Skip to main content

weave_core/
merge.rs

1use std::collections::{HashMap, HashSet};
2use std::io::Write;
3use std::process::Command;
4use std::sync::{mpsc, LazyLock};
5use std::time::Duration;
6
7use serde::Serialize;
8use sem_core::model::change::ChangeType;
9use sem_core::model::entity::SemanticEntity;
10use sem_core::model::identity::match_entities;
11use sem_core::parser::plugins::create_default_registry;
12use sem_core::parser::registry::ParserRegistry;
13
14/// Static parser registry shared across all merge operations.
15/// Avoids recreating 11 tree-sitter language parsers per merge call.
16static PARSER_REGISTRY: LazyLock<ParserRegistry> = LazyLock::new(create_default_registry);
17
18use crate::conflict::{classify_conflict, ConflictKind, EntityConflict, MarkerFormat, MergeStats};
19use crate::region::{extract_regions, EntityRegion, FileRegion};
20use crate::validate::SemanticWarning;
21use crate::reconstruct::reconstruct;
22
23/// How an individual entity was resolved during merge.
24#[derive(Debug, Clone, Serialize)]
25#[serde(rename_all = "snake_case")]
26pub enum ResolutionStrategy {
27    Unchanged,
28    OursOnly,
29    TheirsOnly,
30    ContentEqual,
31    DiffyMerged,
32    DecoratorMerged,
33    InnerMerged,
34    ConflictBothModified,
35    ConflictModifyDelete,
36    ConflictBothAdded,
37    ConflictRenameRename,
38    AddedOurs,
39    AddedTheirs,
40    Deleted,
41    Renamed { from: String, to: String },
42    Fallback,
43}
44
45/// Audit record for a single entity's merge resolution.
46#[derive(Debug, Clone, Serialize)]
47pub struct EntityAudit {
48    pub name: String,
49    #[serde(rename = "type")]
50    pub entity_type: String,
51    pub resolution: ResolutionStrategy,
52}
53
54/// Result of a merge operation.
55#[derive(Debug)]
56pub struct MergeResult {
57    pub content: String,
58    pub conflicts: Vec<EntityConflict>,
59    pub warnings: Vec<SemanticWarning>,
60    pub stats: MergeStats,
61    pub audit: Vec<EntityAudit>,
62}
63
64impl MergeResult {
65    pub fn is_clean(&self) -> bool {
66        self.conflicts.is_empty()
67            && !self.content.lines().any(|l| l.starts_with("<<<<<<< ours"))
68    }
69}
70
71/// The resolved content for a single entity after merging.
72#[derive(Debug, Clone)]
73pub enum ResolvedEntity {
74    /// Clean resolution — use this content.
75    Clean(EntityRegion),
76    /// Conflict — render conflict markers.
77    Conflict(EntityConflict),
78    /// Inner merge with per-member scoped conflicts.
79    /// Content already contains per-member conflict markers; emit as-is.
80    ScopedConflict {
81        content: String,
82        conflict: EntityConflict,
83    },
84    /// Entity was deleted.
85    Deleted,
86}
87
88/// Perform entity-level 3-way merge.
89///
90/// Falls back to line-level merge (via diffy) when:
91/// - No parser matches the file type
92/// - Parser returns 0 entities for non-empty content
93/// - File exceeds 1MB
94pub fn entity_merge(
95    base: &str,
96    ours: &str,
97    theirs: &str,
98    file_path: &str,
99) -> MergeResult {
100    entity_merge_fmt(base, ours, theirs, file_path, &MarkerFormat::default())
101}
102
103/// Perform entity-level 3-way merge with configurable marker format.
104pub fn entity_merge_fmt(
105    base: &str,
106    ours: &str,
107    theirs: &str,
108    file_path: &str,
109    marker_format: &MarkerFormat,
110) -> MergeResult {
111    let timeout_secs = std::env::var("WEAVE_TIMEOUT")
112        .ok()
113        .and_then(|v| v.parse::<u64>().ok())
114        .unwrap_or(5);
115
116    // Timeout: if entity merge takes too long, diffy is likely hitting
117    // pathological input. Fall back to git merge-file which always terminates.
118    let base_owned = base.to_string();
119    let ours_owned = ours.to_string();
120    let theirs_owned = theirs.to_string();
121    let path_owned = file_path.to_string();
122    let fmt_owned = marker_format.clone();
123
124    let (tx, rx) = mpsc::channel();
125    std::thread::spawn(move || {
126        let result = entity_merge_with_registry(&base_owned, &ours_owned, &theirs_owned, &path_owned, &PARSER_REGISTRY, &fmt_owned);
127        let _ = tx.send(result);
128    });
129
130    match rx.recv_timeout(Duration::from_secs(timeout_secs)) {
131        Ok(result) => result,
132        Err(_) => {
133            eprintln!("weave: merge timed out after {}s for {}, falling back to git merge-file", timeout_secs, file_path);
134            let mut stats = MergeStats::default();
135            stats.used_fallback = true;
136            git_merge_file(base, ours, theirs, &mut stats)
137        }
138    }
139}
140
141pub fn entity_merge_with_registry(
142    base: &str,
143    ours: &str,
144    theirs: &str,
145    file_path: &str,
146    registry: &ParserRegistry,
147    marker_format: &MarkerFormat,
148) -> MergeResult {
149    // Guard: if any input already contains conflict markers (e.g. AU/AA conflicts
150    // where git bakes markers into stage blobs), report as conflict immediately.
151    // We can't do a meaningful 3-way merge on pre-conflicted content.
152    if has_conflict_markers(base) || has_conflict_markers(ours) || has_conflict_markers(theirs) {
153        let mut stats = MergeStats::default();
154        stats.entities_conflicted = 1;
155        stats.used_fallback = true;
156        // Use whichever input has markers as the merged content (preserves
157        // the conflict for the user to resolve manually).
158        let content = if has_conflict_markers(ours) {
159            ours
160        } else if has_conflict_markers(theirs) {
161            theirs
162        } else {
163            base
164        };
165        let complexity = classify_conflict(Some(base), Some(ours), Some(theirs));
166        return MergeResult {
167            content: content.to_string(),
168            conflicts: vec![EntityConflict {
169                entity_name: "(file)".to_string(),
170                entity_type: "file".to_string(),
171                kind: ConflictKind::BothModified,
172                complexity,
173                ours_content: Some(ours.to_string()),
174                theirs_content: Some(theirs.to_string()),
175                base_content: Some(base.to_string()),
176            }],
177            warnings: vec![],
178            stats,
179            audit: vec![],
180        };
181    }
182
183    // Fast path: if ours == theirs, no merge needed
184    if ours == theirs {
185        return MergeResult {
186            content: ours.to_string(),
187            conflicts: vec![],
188            warnings: vec![],
189            stats: MergeStats::default(),
190            audit: vec![],
191        };
192    }
193
194    // Fast path: if base == ours, take theirs entirely
195    if base == ours {
196        return MergeResult {
197            content: theirs.to_string(),
198            conflicts: vec![],
199            warnings: vec![],
200            stats: MergeStats {
201                entities_theirs_only: 1,
202                ..Default::default()
203            },
204            audit: vec![],
205        };
206    }
207
208    // Fast path: if base == theirs, take ours entirely
209    if base == theirs {
210        return MergeResult {
211            content: ours.to_string(),
212            conflicts: vec![],
213            warnings: vec![],
214            stats: MergeStats {
215                entities_ours_only: 1,
216                ..Default::default()
217            },
218            audit: vec![],
219        };
220    }
221
222    // Binary file detection: if any version has null bytes, use git merge-file directly
223    if is_binary(base) || is_binary(ours) || is_binary(theirs) {
224        let mut stats = MergeStats::default();
225        stats.used_fallback = true;
226        return git_merge_file(base, ours, theirs, &mut stats);
227    }
228
229    // Large file fallback
230    if base.len() > 1_000_000 || ours.len() > 1_000_000 || theirs.len() > 1_000_000 {
231        return line_level_fallback(base, ours, theirs, file_path);
232    }
233
234    // If the file type isn't natively supported, the registry returns the fallback
235    // plugin (20-line chunks). Entity merge on arbitrary chunks produces WORSE
236    // results than line-level merge (confirmed on GitButler's .svelte files where
237    // chunk boundaries don't align with structural boundaries). So we skip entity
238    // merge entirely for fallback-plugin files and go straight to line-level merge.
239    let plugin = match registry.get_plugin(file_path) {
240        Some(p) if p.id() != "fallback" => p,
241        _ => return line_level_fallback(base, ours, theirs, file_path),
242    };
243
244    // Extract entities from all three versions. Keep unfiltered lists for inner merge
245    // (child entities provide tree-sitter-based method decomposition for classes).
246    let base_all = plugin.extract_entities(base, file_path);
247    let ours_all = plugin.extract_entities(ours, file_path);
248    let theirs_all = plugin.extract_entities(theirs, file_path);
249
250    // Filter out nested entities for top-level matching and region extraction
251    let base_entities = filter_nested_entities(base_all.clone());
252    let ours_entities = filter_nested_entities(ours_all.clone());
253    let theirs_entities = filter_nested_entities(theirs_all.clone());
254
255    // Fallback if parser returns nothing for non-empty content
256    if base_entities.is_empty() && !base.trim().is_empty() {
257        return line_level_fallback(base, ours, theirs, file_path);
258    }
259    // Allow empty entities if content is actually empty
260    if ours_entities.is_empty() && !ours.trim().is_empty() && theirs_entities.is_empty() && !theirs.trim().is_empty() {
261        return line_level_fallback(base, ours, theirs, file_path);
262    }
263
264    // Fallback if too many duplicate entity names. Entity matching is O(n*m) on
265    // same-named entities which can hang on files with many `var app = ...` etc.
266    if has_excessive_duplicates(&base_entities) || has_excessive_duplicates(&ours_entities) || has_excessive_duplicates(&theirs_entities) {
267        return line_level_fallback(base, ours, theirs, file_path);
268    }
269
270    // Extract regions from all three
271    let base_regions = extract_regions(base, &base_entities);
272    let ours_regions = extract_regions(ours, &ours_entities);
273    let theirs_regions = extract_regions(theirs, &theirs_entities);
274
275    // Build region content maps (entity_id → content from file lines, preserving
276    // surrounding syntax like `export` that sem-core's entity.content may strip)
277    let base_region_content = build_region_content_map(&base_regions);
278    let ours_region_content = build_region_content_map(&ours_regions);
279    let theirs_region_content = build_region_content_map(&theirs_regions);
280
281    // Match entities: base↔ours and base↔theirs
282    let ours_changes = match_entities(&base_entities, &ours_entities, file_path, None, None, None);
283    let theirs_changes = match_entities(&base_entities, &theirs_entities, file_path, None, None, None);
284
285    // Build lookup maps
286    let base_entity_map: HashMap<&str, &SemanticEntity> =
287        base_entities.iter().map(|e| (e.id.as_str(), e)).collect();
288    let ours_entity_map: HashMap<&str, &SemanticEntity> =
289        ours_entities.iter().map(|e| (e.id.as_str(), e)).collect();
290    let theirs_entity_map: HashMap<&str, &SemanticEntity> =
291        theirs_entities.iter().map(|e| (e.id.as_str(), e)).collect();
292
293    // Classify what happened to each entity in each branch
294    let mut ours_change_map: HashMap<String, ChangeType> = HashMap::new();
295    for change in &ours_changes.changes {
296        ours_change_map.insert(change.entity_id.clone(), change.change_type);
297    }
298    let mut theirs_change_map: HashMap<String, ChangeType> = HashMap::new();
299    for change in &theirs_changes.changes {
300        theirs_change_map.insert(change.entity_id.clone(), change.change_type);
301    }
302
303    // Detect renames using structural_hash (RefFilter / IntelliMerge-inspired).
304    // When one branch renames an entity, connect the old and new IDs so the merge
305    // treats it as the same entity rather than a delete+add.
306    let ours_rename_to_base = build_rename_map(&base_entities, &ours_entities);
307    let theirs_rename_to_base = build_rename_map(&base_entities, &theirs_entities);
308    // Reverse maps: base_id → renamed_id in that branch
309    let base_to_ours_rename: HashMap<String, String> = ours_rename_to_base
310        .iter()
311        .map(|(new, old)| (old.clone(), new.clone()))
312        .collect();
313    let base_to_theirs_rename: HashMap<String, String> = theirs_rename_to_base
314        .iter()
315        .map(|(new, old)| (old.clone(), new.clone()))
316        .collect();
317
318    // Collect all entity IDs across all versions
319    let mut all_entity_ids: Vec<String> = Vec::new();
320    let mut seen: HashSet<String> = HashSet::new();
321    // Track renamed IDs so we don't process them twice
322    let mut skip_ids: HashSet<String> = HashSet::new();
323    // The "new" IDs from renames should be skipped — they'll be handled via the base ID
324    for new_id in ours_rename_to_base.keys() {
325        skip_ids.insert(new_id.clone());
326    }
327    for new_id in theirs_rename_to_base.keys() {
328        skip_ids.insert(new_id.clone());
329    }
330
331    // Start with ours ordering (skeleton)
332    for entity in &ours_entities {
333        if skip_ids.contains(&entity.id) {
334            continue;
335        }
336        if seen.insert(entity.id.clone()) {
337            all_entity_ids.push(entity.id.clone());
338        }
339    }
340    // Add theirs-only entities
341    for entity in &theirs_entities {
342        if skip_ids.contains(&entity.id) {
343            continue;
344        }
345        if seen.insert(entity.id.clone()) {
346            all_entity_ids.push(entity.id.clone());
347        }
348    }
349    // Add base-only entities (deleted in both → skip, deleted in one → handled below)
350    for entity in &base_entities {
351        if seen.insert(entity.id.clone()) {
352            all_entity_ids.push(entity.id.clone());
353        }
354    }
355
356    let mut stats = MergeStats::default();
357    let mut conflicts: Vec<EntityConflict> = Vec::new();
358    let mut audit: Vec<EntityAudit> = Vec::new();
359    let mut resolved_entities: HashMap<String, ResolvedEntity> = HashMap::new();
360
361    // Detect rename/rename conflicts: same base entity renamed differently in both branches.
362    // These must be flagged before the entity resolution loop, which would otherwise silently
363    // pick ours and also include theirs as an unmatched entity.
364    let mut rename_conflict_ids: HashSet<String> = HashSet::new();
365    for (base_id, ours_new_id) in &base_to_ours_rename {
366        if let Some(theirs_new_id) = base_to_theirs_rename.get(base_id) {
367            if ours_new_id != theirs_new_id {
368                rename_conflict_ids.insert(base_id.clone());
369            }
370        }
371    }
372
373    for entity_id in &all_entity_ids {
374        // Handle rename/rename conflicts: both branches renamed this base entity differently
375        if rename_conflict_ids.contains(entity_id) {
376            let ours_new_id = &base_to_ours_rename[entity_id];
377            let theirs_new_id = &base_to_theirs_rename[entity_id];
378            let base_entity = base_entity_map.get(entity_id.as_str());
379            let ours_entity = ours_entity_map.get(ours_new_id.as_str());
380            let theirs_entity = theirs_entity_map.get(theirs_new_id.as_str());
381            let base_name = base_entity.map(|e| e.name.as_str()).unwrap_or(entity_id);
382            let ours_name = ours_entity.map(|e| e.name.as_str()).unwrap_or(ours_new_id);
383            let theirs_name = theirs_entity.map(|e| e.name.as_str()).unwrap_or(theirs_new_id);
384
385            let base_rc = base_entity.map(|e| base_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
386            let ours_rc = ours_entity.map(|e| ours_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
387            let theirs_rc = theirs_entity.map(|e| theirs_region_content.get(e.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| e.content.clone()));
388
389            stats.entities_conflicted += 1;
390            let conflict = EntityConflict {
391                entity_name: base_name.to_string(),
392                entity_type: base_entity.map(|e| e.entity_type.clone()).unwrap_or_default(),
393                kind: ConflictKind::RenameRename {
394                    base_name: base_name.to_string(),
395                    ours_name: ours_name.to_string(),
396                    theirs_name: theirs_name.to_string(),
397                },
398                complexity: crate::conflict::ConflictComplexity::Syntax,
399                ours_content: ours_rc,
400                theirs_content: theirs_rc,
401                base_content: base_rc,
402            };
403            conflicts.push(conflict.clone());
404            audit.push(EntityAudit {
405                name: base_name.to_string(),
406                entity_type: base_entity.map(|e| e.entity_type.clone()).unwrap_or_default(),
407                resolution: ResolutionStrategy::ConflictRenameRename,
408            });
409            let resolution = ResolvedEntity::Conflict(conflict);
410            resolved_entities.insert(entity_id.clone(), resolution.clone());
411            resolved_entities.insert(ours_new_id.clone(), resolution);
412            // Mark theirs renamed ID as Deleted so reconstruct doesn't emit the conflict twice
413            // (once from ours skeleton, once from theirs-only insertion)
414            resolved_entities.insert(theirs_new_id.clone(), ResolvedEntity::Deleted);
415            continue;
416        }
417
418        let in_base = base_entity_map.get(entity_id.as_str());
419        // Follow rename chains: if base entity was renamed in ours/theirs, use renamed version
420        let ours_id = base_to_ours_rename.get(entity_id.as_str()).map(|s| s.as_str()).unwrap_or(entity_id.as_str());
421        let theirs_id = base_to_theirs_rename.get(entity_id.as_str()).map(|s| s.as_str()).unwrap_or(entity_id.as_str());
422        let in_ours = ours_entity_map.get(ours_id).or_else(|| ours_entity_map.get(entity_id.as_str()));
423        let in_theirs = theirs_entity_map.get(theirs_id).or_else(|| theirs_entity_map.get(entity_id.as_str()));
424
425        let ours_change = ours_change_map.get(entity_id);
426        let theirs_change = theirs_change_map.get(entity_id);
427
428        let (resolution, strategy) = resolve_entity(
429            entity_id,
430            in_base,
431            in_ours,
432            in_theirs,
433            ours_change,
434            theirs_change,
435            &base_region_content,
436            &ours_region_content,
437            &theirs_region_content,
438            &base_all,
439            &ours_all,
440            &theirs_all,
441            &mut stats,
442            marker_format,
443        );
444
445        // Build audit entry from entity info
446        let entity_name = in_ours.map(|e| e.name.as_str())
447            .or_else(|| in_theirs.map(|e| e.name.as_str()))
448            .or_else(|| in_base.map(|e| e.name.as_str()))
449            .unwrap_or(entity_id)
450            .to_string();
451        let entity_type = in_ours.map(|e| e.entity_type.as_str())
452            .or_else(|| in_theirs.map(|e| e.entity_type.as_str()))
453            .or_else(|| in_base.map(|e| e.entity_type.as_str()))
454            .unwrap_or("")
455            .to_string();
456        audit.push(EntityAudit {
457            name: entity_name,
458            entity_type,
459            resolution: strategy,
460        });
461
462        match &resolution {
463            ResolvedEntity::Conflict(ref c) => conflicts.push(c.clone()),
464            ResolvedEntity::ScopedConflict { conflict, .. } => conflicts.push(conflict.clone()),
465            _ => {}
466        }
467
468        resolved_entities.insert(entity_id.clone(), resolution.clone());
469        // Also store under renamed IDs so reconstruct can find them
470        if let Some(ours_renamed_id) = base_to_ours_rename.get(entity_id.as_str()) {
471            resolved_entities.insert(ours_renamed_id.clone(), resolution.clone());
472        }
473        if let Some(theirs_renamed_id) = base_to_theirs_rename.get(entity_id.as_str()) {
474            resolved_entities.insert(theirs_renamed_id.clone(), resolution);
475        }
476    }
477
478    // Merge interstitial regions
479    let (merged_interstitials, interstitial_conflicts) =
480        merge_interstitials(&base_regions, &ours_regions, &theirs_regions, marker_format);
481    stats.entities_conflicted += interstitial_conflicts.len();
482    conflicts.extend(interstitial_conflicts);
483
484    // Reconstruct the file
485    let content = reconstruct(
486        &ours_regions,
487        &theirs_regions,
488        &theirs_entities,
489        &ours_entity_map,
490        &resolved_entities,
491        &merged_interstitials,
492        marker_format,
493    );
494
495    // Post-merge cleanup: remove duplicate lines and normalize blank lines
496    let content = post_merge_cleanup(&content);
497
498    // Post-merge parse validation: verify the merged result still parses correctly
499    // (MergeBot-inspired safety check — catch syntactically broken merges)
500    let mut warnings = vec![];
501    if conflicts.is_empty() && stats.entities_both_changed_merged > 0 {
502        let merged_entities = plugin.extract_entities(&content, file_path);
503        if merged_entities.is_empty() && !content.trim().is_empty() {
504            warnings.push(crate::validate::SemanticWarning {
505                entity_name: "(file)".to_string(),
506                entity_type: "file".to_string(),
507                file_path: file_path.to_string(),
508                kind: crate::validate::WarningKind::ParseFailedAfterMerge,
509                related: vec![],
510            });
511        }
512    }
513
514    let entity_result = MergeResult {
515        content,
516        conflicts,
517        warnings,
518        stats: stats.clone(),
519        audit,
520    };
521
522    // Floor: never produce more conflict markers than git merge-file.
523    // Entity merge can split one git conflict into multiple per-entity conflicts,
524    // or interstitial merges can produce conflicts not tracked in the conflicts vec.
525    let entity_markers = entity_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
526    if entity_markers > 0 {
527        let git_result = git_merge_file(base, ours, theirs, &mut stats);
528        let git_markers = git_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
529        if entity_markers > git_markers {
530            return git_result;
531        }
532    }
533
534    entity_result
535}
536
537fn resolve_entity(
538    _entity_id: &str,
539    in_base: Option<&&SemanticEntity>,
540    in_ours: Option<&&SemanticEntity>,
541    in_theirs: Option<&&SemanticEntity>,
542    _ours_change: Option<&ChangeType>,
543    _theirs_change: Option<&ChangeType>,
544    base_region_content: &HashMap<&str, &str>,
545    ours_region_content: &HashMap<&str, &str>,
546    theirs_region_content: &HashMap<&str, &str>,
547    base_all: &[SemanticEntity],
548    ours_all: &[SemanticEntity],
549    theirs_all: &[SemanticEntity],
550    stats: &mut MergeStats,
551    marker_format: &MarkerFormat,
552) -> (ResolvedEntity, ResolutionStrategy) {
553    // Helper: get region content (from file lines) for an entity, falling back to entity.content
554    let region_content = |entity: &SemanticEntity, map: &HashMap<&str, &str>| -> String {
555        map.get(entity.id.as_str()).map(|s| s.to_string()).unwrap_or_else(|| entity.content.clone())
556    };
557
558    match (in_base, in_ours, in_theirs) {
559        // Entity exists in all three versions
560        (Some(base), Some(ours), Some(theirs)) => {
561            // Check modification status via structural hash AND region content.
562            // Region content may differ even when structural hash is the same
563            // (e.g., doc comment added/changed but function body unchanged).
564            let base_rc_lazy = || region_content(base, base_region_content);
565            let ours_rc_lazy = || region_content(ours, ours_region_content);
566            let theirs_rc_lazy = || region_content(theirs, theirs_region_content);
567
568            let ours_modified = ours.content_hash != base.content_hash
569                || ours_rc_lazy() != base_rc_lazy();
570            let theirs_modified = theirs.content_hash != base.content_hash
571                || theirs_rc_lazy() != base_rc_lazy();
572
573            match (ours_modified, theirs_modified) {
574                (false, false) => {
575                    // Neither changed
576                    stats.entities_unchanged += 1;
577                    (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::Unchanged)
578                }
579                (true, false) => {
580                    // Only ours changed
581                    stats.entities_ours_only += 1;
582                    (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::OursOnly)
583                }
584                (false, true) => {
585                    // Only theirs changed
586                    stats.entities_theirs_only += 1;
587                    (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &region_content(theirs, theirs_region_content))), ResolutionStrategy::TheirsOnly)
588                }
589                (true, true) => {
590                    // Both changed — try intra-entity merge
591                    if ours.content_hash == theirs.content_hash {
592                        // Same change in both — take ours
593                        stats.entities_both_changed_merged += 1;
594                        (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::ContentEqual)
595                    } else {
596                        // Try diffy 3-way merge on region content (preserves full syntax)
597                        let base_rc = region_content(base, base_region_content);
598                        let ours_rc = region_content(ours, ours_region_content);
599                        let theirs_rc = region_content(theirs, theirs_region_content);
600
601                        // Whitespace-aware shortcut: if one side only changed
602                        // whitespace/formatting, take the other side's content changes.
603                        // This handles the common case where one agent reformats while
604                        // another makes semantic changes.
605                        if is_whitespace_only_diff(&base_rc, &ours_rc) {
606                            stats.entities_theirs_only += 1;
607                            return (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &theirs_rc)), ResolutionStrategy::TheirsOnly);
608                        }
609                        if is_whitespace_only_diff(&base_rc, &theirs_rc) {
610                            stats.entities_ours_only += 1;
611                            return (ResolvedEntity::Clean(entity_to_region_with_content(ours, &ours_rc)), ResolutionStrategy::OursOnly);
612                        }
613
614                        match diffy_merge(&base_rc, &ours_rc, &theirs_rc) {
615                            Some(merged) => {
616                                stats.entities_both_changed_merged += 1;
617                                stats.resolved_via_diffy += 1;
618                                (ResolvedEntity::Clean(EntityRegion {
619                                    entity_id: ours.id.clone(),
620                                    entity_name: ours.name.clone(),
621                                    entity_type: ours.entity_type.clone(),
622                                    content: merged,
623                                    start_line: ours.start_line,
624                                    end_line: ours.end_line,
625                                }), ResolutionStrategy::DiffyMerged)
626                            }
627                            None => {
628                                // Strategy 1: decorator/annotation-aware merge
629                                // Decorators are unordered annotations — merge them commutatively
630                                if let Some(merged) = try_decorator_aware_merge(&base_rc, &ours_rc, &theirs_rc) {
631                                    stats.entities_both_changed_merged += 1;
632                                    stats.resolved_via_diffy += 1;
633                                    return (ResolvedEntity::Clean(EntityRegion {
634                                        entity_id: ours.id.clone(),
635                                        entity_name: ours.name.clone(),
636                                        entity_type: ours.entity_type.clone(),
637                                        content: merged,
638                                        start_line: ours.start_line,
639                                        end_line: ours.end_line,
640                                    }), ResolutionStrategy::DecoratorMerged);
641                                }
642
643                                // Strategy 2: inner entity merge for container types
644                                // (LastMerge insight: class members are unordered children)
645                                if is_container_entity_type(&ours.entity_type) {
646                                    let base_children = in_base
647                                        .map(|b| get_child_entities(b, base_all))
648                                        .unwrap_or_default();
649                                    let ours_children = get_child_entities(ours, ours_all);
650                                    let theirs_children = in_theirs
651                                        .map(|t| get_child_entities(t, theirs_all))
652                                        .unwrap_or_default();
653                                    let base_start = in_base.map(|b| b.start_line).unwrap_or(1);
654                                    let ours_start = ours.start_line;
655                                    let theirs_start = in_theirs.map(|t| t.start_line).unwrap_or(1);
656                                    if let Some(inner) = try_inner_entity_merge(
657                                        &base_rc, &ours_rc, &theirs_rc,
658                                        &base_children, &ours_children, &theirs_children,
659                                        base_start, ours_start, theirs_start,
660                                        marker_format,
661                                    ) {
662                                        if inner.has_conflicts {
663                                            // Inner merge produced per-member conflicts:
664                                            // content has scoped markers for just the conflicted
665                                            // members; clean members are merged normally.
666                                            stats.entities_conflicted += 1;
667                                            stats.resolved_via_inner_merge += 1;
668                                            let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), Some(&theirs_rc));
669                                            return (ResolvedEntity::ScopedConflict {
670                                                content: inner.content,
671                                                conflict: EntityConflict {
672                                                    entity_name: ours.name.clone(),
673                                                    entity_type: ours.entity_type.clone(),
674                                                    kind: ConflictKind::BothModified,
675                                                    complexity,
676                                                    ours_content: Some(ours_rc),
677                                                    theirs_content: Some(theirs_rc),
678                                                    base_content: Some(base_rc),
679                                                },
680                                            }, ResolutionStrategy::InnerMerged);
681                                        } else {
682                                            stats.entities_both_changed_merged += 1;
683                                            stats.resolved_via_inner_merge += 1;
684                                            return (ResolvedEntity::Clean(EntityRegion {
685                                                entity_id: ours.id.clone(),
686                                                entity_name: ours.name.clone(),
687                                                entity_type: ours.entity_type.clone(),
688                                                content: inner.content,
689                                                start_line: ours.start_line,
690                                                end_line: ours.end_line,
691                                            }), ResolutionStrategy::InnerMerged);
692                                        }
693                                    }
694                                }
695                                stats.entities_conflicted += 1;
696                                let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), Some(&theirs_rc));
697                                (ResolvedEntity::Conflict(EntityConflict {
698                                    entity_name: ours.name.clone(),
699                                    entity_type: ours.entity_type.clone(),
700                                    kind: ConflictKind::BothModified,
701                                    complexity,
702                                    ours_content: Some(ours_rc),
703                                    theirs_content: Some(theirs_rc),
704                                    base_content: Some(base_rc),
705                                }), ResolutionStrategy::ConflictBothModified)
706                            }
707                        }
708                    }
709                }
710            }
711        }
712
713        // Entity in base and ours, but not theirs → theirs deleted it
714        (Some(_base), Some(ours), None) => {
715            let ours_modified = ours.content_hash != _base.content_hash;
716            if ours_modified {
717                // Modify/delete conflict
718                stats.entities_conflicted += 1;
719                let ours_rc = region_content(ours, ours_region_content);
720                let base_rc = region_content(_base, base_region_content);
721                let complexity = classify_conflict(Some(&base_rc), Some(&ours_rc), None);
722                (ResolvedEntity::Conflict(EntityConflict {
723                    entity_name: ours.name.clone(),
724                    entity_type: ours.entity_type.clone(),
725                    kind: ConflictKind::ModifyDelete {
726                        modified_in_ours: true,
727                    },
728                    complexity,
729                    ours_content: Some(ours_rc),
730                    theirs_content: None,
731                    base_content: Some(base_rc),
732                }), ResolutionStrategy::ConflictModifyDelete)
733            } else {
734                // Theirs deleted, ours unchanged → accept deletion
735                stats.entities_deleted += 1;
736                (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
737            }
738        }
739
740        // Entity in base and theirs, but not ours → ours deleted it
741        (Some(_base), None, Some(theirs)) => {
742            let theirs_modified = theirs.content_hash != _base.content_hash;
743            if theirs_modified {
744                // Modify/delete conflict
745                stats.entities_conflicted += 1;
746                let theirs_rc = region_content(theirs, theirs_region_content);
747                let base_rc = region_content(_base, base_region_content);
748                let complexity = classify_conflict(Some(&base_rc), None, Some(&theirs_rc));
749                (ResolvedEntity::Conflict(EntityConflict {
750                    entity_name: theirs.name.clone(),
751                    entity_type: theirs.entity_type.clone(),
752                    kind: ConflictKind::ModifyDelete {
753                        modified_in_ours: false,
754                    },
755                    complexity,
756                    ours_content: None,
757                    theirs_content: Some(theirs_rc),
758                    base_content: Some(base_rc),
759                }), ResolutionStrategy::ConflictModifyDelete)
760            } else {
761                // Ours deleted, theirs unchanged → accept deletion
762                stats.entities_deleted += 1;
763                (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
764            }
765        }
766
767        // Entity only in ours (added by ours)
768        (None, Some(ours), None) => {
769            stats.entities_added_ours += 1;
770            (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::AddedOurs)
771        }
772
773        // Entity only in theirs (added by theirs)
774        (None, None, Some(theirs)) => {
775            stats.entities_added_theirs += 1;
776            (ResolvedEntity::Clean(entity_to_region_with_content(theirs, &region_content(theirs, theirs_region_content))), ResolutionStrategy::AddedTheirs)
777        }
778
779        // Entity in both ours and theirs but not base (both added)
780        (None, Some(ours), Some(theirs)) => {
781            if ours.content_hash == theirs.content_hash {
782                // Same content added by both → take ours
783                stats.entities_added_ours += 1;
784                (ResolvedEntity::Clean(entity_to_region_with_content(ours, &region_content(ours, ours_region_content))), ResolutionStrategy::ContentEqual)
785            } else {
786                // Different content → conflict
787                stats.entities_conflicted += 1;
788                let ours_rc = region_content(ours, ours_region_content);
789                let theirs_rc = region_content(theirs, theirs_region_content);
790                let complexity = classify_conflict(None, Some(&ours_rc), Some(&theirs_rc));
791                (ResolvedEntity::Conflict(EntityConflict {
792                    entity_name: ours.name.clone(),
793                    entity_type: ours.entity_type.clone(),
794                    kind: ConflictKind::BothAdded,
795                    complexity,
796                    ours_content: Some(ours_rc),
797                    theirs_content: Some(theirs_rc),
798                    base_content: None,
799                }), ResolutionStrategy::ConflictBothAdded)
800            }
801        }
802
803        // Entity only in base (deleted by both)
804        (Some(_), None, None) => {
805            stats.entities_deleted += 1;
806            (ResolvedEntity::Deleted, ResolutionStrategy::Deleted)
807        }
808
809        // Should not happen
810        (None, None, None) => (ResolvedEntity::Deleted, ResolutionStrategy::Deleted),
811    }
812}
813
814fn entity_to_region_with_content(entity: &SemanticEntity, content: &str) -> EntityRegion {
815    EntityRegion {
816        entity_id: entity.id.clone(),
817        entity_name: entity.name.clone(),
818        entity_type: entity.entity_type.clone(),
819        content: content.to_string(),
820        start_line: entity.start_line,
821        end_line: entity.end_line,
822    }
823}
824
825/// Build a map from entity_id to region content (from file lines).
826/// This preserves surrounding syntax (like `export`) that sem-core's entity.content may strip.
827/// Returns borrowed references since regions live for the merge duration.
828fn build_region_content_map(regions: &[FileRegion]) -> HashMap<&str, &str> {
829    regions
830        .iter()
831        .filter_map(|r| match r {
832            FileRegion::Entity(e) => Some((e.entity_id.as_str(), e.content.as_str())),
833            _ => None,
834        })
835        .collect()
836}
837
838/// Check if the only differences between two strings are whitespace changes.
839/// This includes: indentation changes, trailing whitespace, blank line additions/removals.
840fn is_whitespace_only_diff(a: &str, b: &str) -> bool {
841    if a == b {
842        return true; // identical, not really a "whitespace-only diff" but safe
843    }
844    let a_normalized: Vec<&str> = a.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
845    let b_normalized: Vec<&str> = b.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
846    a_normalized == b_normalized
847}
848
849/// Check if a line is a decorator or annotation.
850/// Covers Python (@decorator), Java/TS (@Annotation), and comment-style annotations.
851fn is_decorator_line(line: &str) -> bool {
852    let trimmed = line.trim();
853    trimmed.starts_with('@')
854        && !trimmed.starts_with("@param")
855        && !trimmed.starts_with("@return")
856        && !trimmed.starts_with("@type")
857        && !trimmed.starts_with("@see")
858}
859
860/// Split content into (decorators, body) where decorators are leading @-prefixed lines.
861fn split_decorators(content: &str) -> (Vec<&str>, &str) {
862    let mut decorator_end = 0;
863    let mut byte_offset = 0;
864    for line in content.lines() {
865        if is_decorator_line(line) || line.trim().is_empty() {
866            decorator_end += 1;
867            byte_offset += line.len() + 1; // +1 for newline
868        } else {
869            break;
870        }
871    }
872    // Trim trailing empty lines from decorator section
873    let lines: Vec<&str> = content.lines().collect();
874    while decorator_end > 0 && lines.get(decorator_end - 1).map_or(false, |l| l.trim().is_empty()) {
875        byte_offset -= lines[decorator_end - 1].len() + 1;
876        decorator_end -= 1;
877    }
878    let decorators: Vec<&str> = lines[..decorator_end]
879        .iter()
880        .filter(|l| is_decorator_line(l))
881        .copied()
882        .collect();
883    let body = &content[byte_offset.min(content.len())..];
884    (decorators, body)
885}
886
887/// Try decorator-aware merge: when both sides add different decorators/annotations,
888/// merge them commutatively (like imports). Also try merging the bodies separately.
889///
890/// This handles the common pattern where one agent adds @cache and another adds @deprecated
891/// to the same function — they should both be preserved.
892fn try_decorator_aware_merge(base: &str, ours: &str, theirs: &str) -> Option<String> {
893    let (base_decorators, base_body) = split_decorators(base);
894    let (ours_decorators, ours_body) = split_decorators(ours);
895    let (theirs_decorators, theirs_body) = split_decorators(theirs);
896
897    // Only useful if at least one side has decorators
898    if ours_decorators.is_empty() && theirs_decorators.is_empty() {
899        return None;
900    }
901
902    // Merge bodies using diffy (or take unchanged side)
903    let merged_body = if base_body == ours_body && base_body == theirs_body {
904        base_body.to_string()
905    } else if base_body == ours_body {
906        theirs_body.to_string()
907    } else if base_body == theirs_body {
908        ours_body.to_string()
909    } else {
910        // Both changed body — try diffy on just the body
911        diffy_merge(base_body, ours_body, theirs_body)?
912    };
913
914    // Merge decorators commutatively (set union)
915    let base_set: HashSet<&str> = base_decorators.iter().copied().collect();
916    let ours_set: HashSet<&str> = ours_decorators.iter().copied().collect();
917    let theirs_set: HashSet<&str> = theirs_decorators.iter().copied().collect();
918
919    // Deletions
920    let ours_deleted: HashSet<&str> = base_set.difference(&ours_set).copied().collect();
921    let theirs_deleted: HashSet<&str> = base_set.difference(&theirs_set).copied().collect();
922
923    // Start with base decorators, remove deletions
924    let mut merged_decorators: Vec<&str> = base_decorators
925        .iter()
926        .filter(|d| !ours_deleted.contains(**d) && !theirs_deleted.contains(**d))
927        .copied()
928        .collect();
929
930    // Add new decorators from ours (not in base)
931    for d in &ours_decorators {
932        if !base_set.contains(d) && !merged_decorators.contains(d) {
933            merged_decorators.push(d);
934        }
935    }
936    // Add new decorators from theirs (not in base, not already added)
937    for d in &theirs_decorators {
938        if !base_set.contains(d) && !merged_decorators.contains(d) {
939            merged_decorators.push(d);
940        }
941    }
942
943    // Reconstruct
944    let mut result = String::new();
945    for d in &merged_decorators {
946        result.push_str(d);
947        result.push('\n');
948    }
949    result.push_str(&merged_body);
950
951    Some(result)
952}
953
954/// Try 3-way merge on text using diffy. Returns None if there are conflicts.
955fn diffy_merge(base: &str, ours: &str, theirs: &str) -> Option<String> {
956    let result = diffy::merge(base, ours, theirs);
957    match result {
958        Ok(merged) => Some(merged),
959        Err(_conflicted) => None,
960    }
961}
962
963/// Try 3-way merge using git merge-file. Returns None on conflict or error.
964/// This uses a different diff algorithm than diffy and can sometimes merge
965/// cases that diffy cannot (and vice versa).
966fn git_merge_string(base: &str, ours: &str, theirs: &str) -> Option<String> {
967    let dir = tempfile::tempdir().ok()?;
968    let base_path = dir.path().join("base");
969    let ours_path = dir.path().join("ours");
970    let theirs_path = dir.path().join("theirs");
971
972    std::fs::write(&base_path, base).ok()?;
973    std::fs::write(&ours_path, ours).ok()?;
974    std::fs::write(&theirs_path, theirs).ok()?;
975
976    let output = Command::new("git")
977        .arg("merge-file")
978        .arg("-p")
979        .arg(&ours_path)
980        .arg(&base_path)
981        .arg(&theirs_path)
982        .output()
983        .ok()?;
984
985    if output.status.success() {
986        String::from_utf8(output.stdout).ok()
987    } else {
988        None
989    }
990}
991
992/// Merge interstitial regions from all three versions.
993/// Uses commutative (set-based) merge for import blocks — inspired by
994/// LastMerge/Mergiraf's "unordered children" concept.
995/// Falls back to line-level 3-way merge for non-import content.
996fn merge_interstitials(
997    base_regions: &[FileRegion],
998    ours_regions: &[FileRegion],
999    theirs_regions: &[FileRegion],
1000    marker_format: &MarkerFormat,
1001) -> (HashMap<String, String>, Vec<EntityConflict>) {
1002    let base_map: HashMap<&str, &str> = base_regions
1003        .iter()
1004        .filter_map(|r| match r {
1005            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1006            _ => None,
1007        })
1008        .collect();
1009
1010    let ours_map: HashMap<&str, &str> = ours_regions
1011        .iter()
1012        .filter_map(|r| match r {
1013            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1014            _ => None,
1015        })
1016        .collect();
1017
1018    let theirs_map: HashMap<&str, &str> = theirs_regions
1019        .iter()
1020        .filter_map(|r| match r {
1021            FileRegion::Interstitial(i) => Some((i.position_key.as_str(), i.content.as_str())),
1022            _ => None,
1023        })
1024        .collect();
1025
1026    let mut all_keys: HashSet<&str> = HashSet::new();
1027    all_keys.extend(base_map.keys());
1028    all_keys.extend(ours_map.keys());
1029    all_keys.extend(theirs_map.keys());
1030
1031    let mut merged: HashMap<String, String> = HashMap::new();
1032    let mut interstitial_conflicts: Vec<EntityConflict> = Vec::new();
1033
1034    for key in all_keys {
1035        let base_content = base_map.get(key).copied().unwrap_or("");
1036        let ours_content = ours_map.get(key).copied().unwrap_or("");
1037        let theirs_content = theirs_map.get(key).copied().unwrap_or("");
1038
1039        // If all same, no merge needed
1040        if ours_content == theirs_content {
1041            merged.insert(key.to_string(), ours_content.to_string());
1042        } else if base_content == ours_content {
1043            merged.insert(key.to_string(), theirs_content.to_string());
1044        } else if base_content == theirs_content {
1045            merged.insert(key.to_string(), ours_content.to_string());
1046        } else {
1047            // Both changed — check if this is an import-heavy region
1048            if is_import_region(base_content)
1049                || is_import_region(ours_content)
1050                || is_import_region(theirs_content)
1051            {
1052                // Commutative merge: treat import lines as a set
1053                let result = merge_imports_commutatively(base_content, ours_content, theirs_content);
1054                merged.insert(key.to_string(), result);
1055            } else {
1056                // Regular line-level merge
1057                match diffy::merge(base_content, ours_content, theirs_content) {
1058                    Ok(m) => {
1059                        merged.insert(key.to_string(), m);
1060                    }
1061                    Err(_conflicted) => {
1062                        // Create a proper conflict instead of silently embedding
1063                        // raw conflict markers into the output.
1064                        let complexity = classify_conflict(
1065                            Some(base_content),
1066                            Some(ours_content),
1067                            Some(theirs_content),
1068                        );
1069                        let conflict = EntityConflict {
1070                            entity_name: key.to_string(),
1071                            entity_type: "interstitial".to_string(),
1072                            kind: ConflictKind::BothModified,
1073                            complexity,
1074                            ours_content: Some(ours_content.to_string()),
1075                            theirs_content: Some(theirs_content.to_string()),
1076                            base_content: Some(base_content.to_string()),
1077                        };
1078                        merged.insert(key.to_string(), conflict.to_conflict_markers(marker_format));
1079                        interstitial_conflicts.push(conflict);
1080                    }
1081                }
1082            }
1083        }
1084    }
1085
1086    (merged, interstitial_conflicts)
1087}
1088
1089/// Check if a region is predominantly import/use statements.
1090fn is_import_region(content: &str) -> bool {
1091    let lines: Vec<&str> = content
1092        .lines()
1093        .filter(|l| !l.trim().is_empty())
1094        .collect();
1095    if lines.is_empty() {
1096        return false;
1097    }
1098    let import_count = lines.iter().filter(|l| is_import_line(l)).count();
1099    // If >50% of non-empty lines are imports, treat as import region
1100    import_count * 2 > lines.len()
1101}
1102
1103/// Post-merge cleanup: remove consecutive duplicate lines and normalize blank lines.
1104///
1105/// Fixes two classes of merge artifacts:
1106/// 1. Duplicate lines/blocks that appear when both sides add the same content
1107///    (e.g. duplicate typedefs, forward declarations)
1108/// 2. Missing blank lines between entities or declarations, and excessive
1109///    blank lines (3+ consecutive) collapsed to 2
1110fn post_merge_cleanup(content: &str) -> String {
1111    let lines: Vec<&str> = content.lines().collect();
1112    let mut result: Vec<&str> = Vec::with_capacity(lines.len());
1113
1114    // Pass 1: Remove consecutive duplicate lines that look like declarations or imports.
1115    // Only dedup lines that are plausibly merge artifacts (imports, exports, forward decls).
1116    // Preserve intentional duplicates like repeated assertions, assignments, or data lines.
1117    for line in &lines {
1118        if line.trim().is_empty() {
1119            result.push(line);
1120            continue;
1121        }
1122        if let Some(prev) = result.last() {
1123            if !prev.trim().is_empty() && *prev == *line && looks_like_declaration(line) {
1124                continue; // skip consecutive exact duplicate of declaration-like line
1125            }
1126        }
1127        result.push(line);
1128    }
1129
1130    // Pass 2: Collapse 3+ consecutive blank lines to 2 (one separator blank line).
1131    let mut final_lines: Vec<&str> = Vec::with_capacity(result.len());
1132    let mut consecutive_blanks = 0;
1133    for line in &result {
1134        if line.trim().is_empty() {
1135            consecutive_blanks += 1;
1136            if consecutive_blanks <= 2 {
1137                final_lines.push(line);
1138            }
1139        } else {
1140            consecutive_blanks = 0;
1141            final_lines.push(line);
1142        }
1143    }
1144
1145    let mut out = final_lines.join("\n");
1146    if content.ends_with('\n') && !out.ends_with('\n') {
1147        out.push('\n');
1148    }
1149    out
1150}
1151
1152/// Check if a line looks like a declaration/import that merge might duplicate.
1153/// Returns false for lines that could be intentionally repeated (assertions,
1154/// assignments, data initializers, struct fields, etc.).
1155fn looks_like_declaration(line: &str) -> bool {
1156    let trimmed = line.trim();
1157    trimmed.starts_with("import ")
1158        || trimmed.starts_with("from ")
1159        || trimmed.starts_with("use ")
1160        || trimmed.starts_with("export ")
1161        || trimmed.starts_with("require(")
1162        || trimmed.starts_with("#include")
1163        || trimmed.starts_with("typedef ")
1164        || trimmed.starts_with("using ")
1165        || (trimmed.starts_with("pub ") && trimmed.contains("mod "))
1166}
1167
1168/// Check if a line is a top-level import/use/require statement.
1169///
1170/// Only matches unindented lines to avoid picking up conditional imports
1171/// inside `if TYPE_CHECKING:` blocks or similar constructs.
1172fn is_import_line(line: &str) -> bool {
1173    // Skip indented lines: these are inside conditional blocks (TYPE_CHECKING, etc.)
1174    if line.starts_with(' ') || line.starts_with('\t') {
1175        return false;
1176    }
1177    let trimmed = line.trim();
1178    trimmed.starts_with("import ")
1179        || trimmed.starts_with("from ")
1180        || trimmed.starts_with("use ")
1181        || trimmed.starts_with("require(")
1182        || trimmed.starts_with("const ") && trimmed.contains("require(")
1183        || trimmed.starts_with("package ")
1184        || trimmed.starts_with("#include ")
1185        || trimmed.starts_with("using ")
1186}
1187
1188/// Merge import blocks commutatively (as unordered sets), preserving grouping.
1189///
1190/// Splits imports into groups separated by blank lines. Merges within each group
1191/// commutatively. New imports from theirs go into the matching group (by source
1192/// prefix, e.g. "from collections" matches "from collections.abc") or the last group.
1193fn merge_imports_commutatively(base: &str, ours: &str, theirs: &str) -> String {
1194    let base_imports: HashSet<&str> = base.lines().filter(|l| is_import_line(l)).collect();
1195    let ours_imports: HashSet<&str> = ours.lines().filter(|l| is_import_line(l)).collect();
1196
1197    // Theirs deleted: in base but removed by theirs. Remove from ours output.
1198    let theirs_deleted: HashSet<&str> = base_imports.difference(
1199        &theirs.lines().filter(|l| is_import_line(l)).collect::<HashSet<&str>>()
1200    ).copied().collect();
1201
1202    // Theirs added: new in theirs, not in base, not already in ours
1203    let theirs_added: Vec<&str> = theirs
1204        .lines()
1205        .filter(|l| is_import_line(l) && !base_imports.contains(l) && !ours_imports.contains(l))
1206        .collect();
1207
1208    // Split ours into groups (separated by blank lines), preserving non-import lines
1209    let mut groups: Vec<Vec<&str>> = Vec::new();
1210    let mut current_group: Vec<&str> = Vec::new();
1211    let mut non_import_lines: Vec<(usize, &str)> = Vec::new(); // (group_idx, line)
1212
1213    for line in ours.lines() {
1214        if line.trim().is_empty() {
1215            if !current_group.is_empty() {
1216                groups.push(current_group);
1217                current_group = Vec::new();
1218            }
1219            // Track blank lines for reconstruction
1220            non_import_lines.push((groups.len(), line));
1221        } else if is_import_line(line) {
1222            if theirs_deleted.contains(line) {
1223                continue;
1224            }
1225            current_group.push(line);
1226        } else {
1227            // Non-import, non-blank line (comment, etc.)
1228            current_group.push(line);
1229        }
1230    }
1231    if !current_group.is_empty() {
1232        groups.push(current_group);
1233    }
1234
1235    // For each theirs addition, find the best matching group by source prefix
1236    for add in &theirs_added {
1237        let prefix = import_source_prefix(add);
1238        let mut best_group = if groups.is_empty() { 0 } else { groups.len() - 1 };
1239        for (i, group) in groups.iter().enumerate() {
1240            if group.iter().any(|l| {
1241                is_import_line(l) && import_source_prefix(l) == prefix
1242            }) {
1243                best_group = i;
1244                break;
1245            }
1246        }
1247        if best_group < groups.len() {
1248            groups[best_group].push(add);
1249        } else {
1250            groups.push(vec![add]);
1251        }
1252    }
1253
1254    // Reconstruct with blank line separators between groups
1255    let mut result_lines: Vec<&str> = Vec::new();
1256    for (i, group) in groups.iter().enumerate() {
1257        if i > 0 {
1258            result_lines.push("");
1259        }
1260        result_lines.extend(group);
1261    }
1262
1263    let mut result = result_lines.join("\n");
1264    // Preserve trailing newlines from ours
1265    let ours_trailing = ours.len() - ours.trim_end_matches('\n').len();
1266    let result_trailing = result.len() - result.trim_end_matches('\n').len();
1267    for _ in result_trailing..ours_trailing {
1268        result.push('\n');
1269    }
1270    result
1271}
1272
1273/// Extract the source/module prefix from an import line for group matching.
1274/// e.g. "from collections import OrderedDict" -> "collections"
1275///      "import React from 'react'" -> "react"
1276///      "use std::collections::HashMap;" -> "std::collections"
1277fn import_source_prefix(line: &str) -> &str {
1278    let trimmed = line.trim();
1279    // Python: "from X import Y" -> X
1280    if let Some(rest) = trimmed.strip_prefix("from ") {
1281        return rest.split_whitespace().next().unwrap_or("");
1282    }
1283    // JS/TS: "import X from 'Y'" -> Y (between quotes)
1284    if trimmed.starts_with("import ") {
1285        if let Some(quote_start) = trimmed.find(|c: char| c == '\'' || c == '"') {
1286            let after = &trimmed[quote_start + 1..];
1287            if let Some(quote_end) = after.find(|c: char| c == '\'' || c == '"') {
1288                return &after[..quote_end];
1289            }
1290        }
1291    }
1292    // Rust: "use X::Y;" -> X
1293    if let Some(rest) = trimmed.strip_prefix("use ") {
1294        return rest.split("::").next().unwrap_or("").trim_end_matches(';');
1295    }
1296    trimmed
1297}
1298
1299/// Fallback to line-level 3-way merge when entity extraction isn't possible.
1300///
1301/// Uses Sesame-inspired separator preprocessing (arXiv:2407.18888) to get
1302/// finer-grained alignment before line-level merge. Inserts newlines around
1303/// syntactic separators ({, }, ;) so that changes in different code blocks
1304/// align independently, reducing spurious conflicts.
1305///
1306/// Sesame expansion is skipped for data formats (JSON, YAML, TOML, lock files)
1307/// where `{`, `}`, `;` are structural content rather than code separators.
1308/// Expanding them destroys alignment and produces far more conflicts (confirmed
1309/// on GitButler: YAML went from 68 git markers to 192 weave markers with Sesame).
1310fn line_level_fallback(base: &str, ours: &str, theirs: &str, file_path: &str) -> MergeResult {
1311    let mut stats = MergeStats::default();
1312    stats.used_fallback = true;
1313
1314    // Skip Sesame preprocessing for data formats where {/}/; are content, not separators
1315    let skip = skip_sesame(file_path);
1316
1317    if skip {
1318        // Use git merge-file for data formats so we match git's output exactly.
1319        // diffy::merge uses a different diff algorithm that can produce more
1320        // conflict markers on structured data like lock files.
1321        return git_merge_file(base, ours, theirs, &mut stats);
1322    }
1323
1324    // Try Sesame expansion + diffy first, then compare against git merge-file.
1325    // Use whichever produces fewer conflict markers so we're never worse than git.
1326    let base_expanded = expand_separators(base);
1327    let ours_expanded = expand_separators(ours);
1328    let theirs_expanded = expand_separators(theirs);
1329
1330    let sesame_result = match diffy::merge(&base_expanded, &ours_expanded, &theirs_expanded) {
1331        Ok(merged) => {
1332            let content = collapse_separators(&merged, base);
1333            Some(MergeResult {
1334                content: post_merge_cleanup(&content),
1335                conflicts: vec![],
1336                warnings: vec![],
1337                stats: stats.clone(),
1338                audit: vec![],
1339            })
1340        }
1341        Err(_) => {
1342            // Sesame expansion conflicted, try plain diffy
1343            match diffy::merge(base, ours, theirs) {
1344                Ok(merged) => Some(MergeResult {
1345                    content: merged,
1346                    conflicts: vec![],
1347                    warnings: vec![],
1348                    stats: stats.clone(),
1349                    audit: vec![],
1350                }),
1351                Err(conflicted) => {
1352                    let _markers = conflicted.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1353                    let mut s = stats.clone();
1354                    s.entities_conflicted = 1;
1355                    Some(MergeResult {
1356                        content: conflicted,
1357                        conflicts: vec![EntityConflict {
1358                            entity_name: "(file)".to_string(),
1359                            entity_type: "file".to_string(),
1360                            kind: ConflictKind::BothModified,
1361                            complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1362                            ours_content: Some(ours.to_string()),
1363                            theirs_content: Some(theirs.to_string()),
1364                            base_content: Some(base.to_string()),
1365                        }],
1366                        warnings: vec![],
1367                        stats: s,
1368                        audit: vec![],
1369                    })
1370                }
1371            }
1372        }
1373    };
1374
1375    // Get git merge-file result as our floor
1376    let git_result = git_merge_file(base, ours, theirs, &mut stats);
1377
1378    // Compare: use sesame result only if it has fewer or equal markers
1379    match sesame_result {
1380        Some(sesame) if sesame.conflicts.is_empty() && !git_result.conflicts.is_empty() => {
1381            // Sesame resolved cleanly, git didn't: use sesame
1382            sesame
1383        }
1384        Some(sesame) if !sesame.conflicts.is_empty() && !git_result.conflicts.is_empty() => {
1385            // Both conflicted: use whichever has fewer markers
1386            let sesame_markers = sesame.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1387            let git_markers = git_result.content.lines().filter(|l| l.starts_with("<<<<<<<")).count();
1388            if sesame_markers <= git_markers { sesame } else { git_result }
1389        }
1390        _ => git_result,
1391    }
1392}
1393
1394/// Shell out to `git merge-file` for an exact match with git's line-level merge.
1395///
1396/// We use this instead of `diffy::merge` for data formats (lock files, JSON, YAML, TOML)
1397/// where weave can't improve on git. `diffy` uses a different diff algorithm that can
1398/// produce more conflict markers on structured data (e.g. 22 markers vs git's 19 on uv.lock).
1399fn git_merge_file(base: &str, ours: &str, theirs: &str, stats: &mut MergeStats) -> MergeResult {
1400    let dir = match tempfile::tempdir() {
1401        Ok(d) => d,
1402        Err(_) => return diffy_fallback(base, ours, theirs, stats),
1403    };
1404
1405    let base_path = dir.path().join("base");
1406    let ours_path = dir.path().join("ours");
1407    let theirs_path = dir.path().join("theirs");
1408
1409    let write_ok = (|| -> std::io::Result<()> {
1410        std::fs::File::create(&base_path)?.write_all(base.as_bytes())?;
1411        std::fs::File::create(&ours_path)?.write_all(ours.as_bytes())?;
1412        std::fs::File::create(&theirs_path)?.write_all(theirs.as_bytes())?;
1413        Ok(())
1414    })();
1415
1416    if write_ok.is_err() {
1417        return diffy_fallback(base, ours, theirs, stats);
1418    }
1419
1420    // git merge-file writes result to the first file (ours) in place
1421    let output = Command::new("git")
1422        .arg("merge-file")
1423        .arg("-p") // print to stdout instead of modifying ours in place
1424        .arg(&ours_path)
1425        .arg(&base_path)
1426        .arg(&theirs_path)
1427        .output();
1428
1429    match output {
1430        Ok(result) => {
1431            let content = String::from_utf8_lossy(&result.stdout).into_owned();
1432            if result.status.success() {
1433                // Exit 0 = clean merge
1434                MergeResult {
1435                    content: post_merge_cleanup(&content),
1436                    conflicts: vec![],
1437                    warnings: vec![],
1438                    stats: stats.clone(),
1439                    audit: vec![],
1440                }
1441            } else {
1442                // Exit >0 = conflicts (exit code = number of conflicts)
1443                stats.entities_conflicted = 1;
1444                MergeResult {
1445                    content,
1446                    conflicts: vec![EntityConflict {
1447                        entity_name: "(file)".to_string(),
1448                        entity_type: "file".to_string(),
1449                        kind: ConflictKind::BothModified,
1450                        complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1451                        ours_content: Some(ours.to_string()),
1452                        theirs_content: Some(theirs.to_string()),
1453                        base_content: Some(base.to_string()),
1454                    }],
1455                    warnings: vec![],
1456                    stats: stats.clone(),
1457                    audit: vec![],
1458                }
1459            }
1460        }
1461        // git not available, fall back to diffy
1462        Err(_) => diffy_fallback(base, ours, theirs, stats),
1463    }
1464}
1465
1466/// Fallback to diffy::merge when git merge-file is unavailable.
1467fn diffy_fallback(base: &str, ours: &str, theirs: &str, stats: &mut MergeStats) -> MergeResult {
1468    match diffy::merge(base, ours, theirs) {
1469        Ok(merged) => {
1470            let content = post_merge_cleanup(&merged);
1471            MergeResult {
1472                content,
1473                conflicts: vec![],
1474                warnings: vec![],
1475                stats: stats.clone(),
1476                audit: vec![],
1477            }
1478        }
1479        Err(conflicted) => {
1480            stats.entities_conflicted = 1;
1481            MergeResult {
1482                content: conflicted,
1483                conflicts: vec![EntityConflict {
1484                    entity_name: "(file)".to_string(),
1485                    entity_type: "file".to_string(),
1486                    kind: ConflictKind::BothModified,
1487                    complexity: classify_conflict(Some(base), Some(ours), Some(theirs)),
1488                    ours_content: Some(ours.to_string()),
1489                    theirs_content: Some(theirs.to_string()),
1490                    base_content: Some(base.to_string()),
1491                }],
1492                warnings: vec![],
1493                stats: stats.clone(),
1494                audit: vec![],
1495            }
1496        }
1497    }
1498}
1499
1500/// Filter out entities that are nested inside other entities.
1501///
1502/// When a class contains methods which contain local variables, sem-core may extract
1503/// all of them as entities. But for merge purposes, nested entities are part of their
1504/// parent — we handle them via inner entity merge. Keeping them causes false conflicts
1505/// (e.g. two methods both declaring `const user` would appear as BothAdded).
1506/// Check if entity list has too many duplicate names, which causes matching to hang.
1507fn has_excessive_duplicates(entities: &[SemanticEntity]) -> bool {
1508    let threshold = std::env::var("WEAVE_MAX_DUPLICATES")
1509        .ok()
1510        .and_then(|v| v.parse::<usize>().ok())
1511        .unwrap_or(10);
1512    let mut counts: HashMap<&str, usize> = HashMap::new();
1513    for e in entities {
1514        *counts.entry(&e.name).or_default() += 1;
1515    }
1516    counts.values().any(|&c| c >= threshold)
1517}
1518
1519/// Filter out entities that are nested inside other entities.
1520/// O(n log n) via sort + stack, replacing the previous O(n^2) approach.
1521fn filter_nested_entities(mut entities: Vec<SemanticEntity>) -> Vec<SemanticEntity> {
1522    if entities.len() <= 1 {
1523        return entities;
1524    }
1525
1526    // Sort by start_line ASC, then by end_line DESC (widest span first).
1527    // A parent entity always appears before its children in this order.
1528    entities.sort_by(|a, b| {
1529        a.start_line.cmp(&b.start_line).then(b.end_line.cmp(&a.end_line))
1530    });
1531
1532    // Stack-based filter: track the end_line of the current outermost entity.
1533    let mut result: Vec<SemanticEntity> = Vec::with_capacity(entities.len());
1534    let mut max_end: usize = 0;
1535
1536    for entity in entities {
1537        if entity.start_line > max_end || max_end == 0 {
1538            // Not nested: new top-level entity
1539            max_end = entity.end_line;
1540            result.push(entity);
1541        } else if entity.start_line == result.last().map_or(0, |e| e.start_line)
1542            && entity.end_line == result.last().map_or(0, |e| e.end_line)
1543        {
1544            // Exact same span (e.g. decorated_definition wrapping function_definition)
1545            result.push(entity);
1546        }
1547        // else: strictly nested, skip
1548    }
1549
1550    result
1551}
1552
1553/// Get child entities of a parent, sorted by start line.
1554fn get_child_entities<'a>(
1555    parent: &SemanticEntity,
1556    all_entities: &'a [SemanticEntity],
1557) -> Vec<&'a SemanticEntity> {
1558    let mut children: Vec<&SemanticEntity> = all_entities
1559        .iter()
1560        .filter(|e| e.parent_id.as_deref() == Some(&parent.id))
1561        .collect();
1562    children.sort_by_key(|e| e.start_line);
1563    children
1564}
1565
1566/// Compute a body hash for rename detection: the entity content with the entity
1567/// name replaced at word boundaries by a placeholder, so entities with identical
1568/// bodies but different names produce the same hash.
1569///
1570/// Uses word-boundary matching to avoid partial replacements (e.g. replacing
1571/// "get" inside "getAll"). Works across all languages since it operates on
1572/// the content string, not language-specific AST features.
1573fn body_hash(entity: &SemanticEntity) -> u64 {
1574    use std::collections::hash_map::DefaultHasher;
1575    use std::hash::{Hash, Hasher};
1576    let normalized = replace_at_word_boundaries(&entity.content, &entity.name, "__ENTITY__");
1577    let mut hasher = DefaultHasher::new();
1578    normalized.hash(&mut hasher);
1579    hasher.finish()
1580}
1581
1582/// Replace `needle` with `replacement` only at word boundaries.
1583/// A word boundary means the character before/after the match is not
1584/// alphanumeric or underscore (i.e. not an identifier character).
1585fn replace_at_word_boundaries(content: &str, needle: &str, replacement: &str) -> String {
1586    if needle.is_empty() {
1587        return content.to_string();
1588    }
1589    let bytes = content.as_bytes();
1590    let mut result = String::with_capacity(content.len());
1591    let mut i = 0;
1592    while i < content.len() {
1593        if content.is_char_boundary(i) && content[i..].starts_with(needle) {
1594            let before_ok = i == 0 || {
1595                let prev_idx = content[..i]
1596                    .char_indices()
1597                    .next_back()
1598                    .map(|(idx, _)| idx)
1599                    .unwrap_or(0);
1600                !is_ident_char(bytes[prev_idx])
1601            };
1602            let after_idx = i + needle.len();
1603            let after_ok = after_idx >= content.len()
1604                || (content.is_char_boundary(after_idx)
1605                    && !is_ident_char(bytes[after_idx]));
1606            if before_ok && after_ok {
1607                result.push_str(replacement);
1608                i += needle.len();
1609                continue;
1610            }
1611        }
1612        if content.is_char_boundary(i) {
1613            let ch = content[i..].chars().next().unwrap();
1614            result.push(ch);
1615            i += ch.len_utf8();
1616        } else {
1617            i += 1;
1618        }
1619    }
1620    result
1621}
1622
1623fn is_ident_char(b: u8) -> bool {
1624    b.is_ascii_alphanumeric() || b == b'_'
1625}
1626
1627/// Build a rename map from new_id → base_id using confidence-scored matching.
1628///
1629/// Detects when an entity in the branch has the same body as an entity
1630/// in base but a different name/ID, indicating it was renamed.
1631/// Uses body_hash (name-stripped content hash) and structural_hash with
1632/// confidence scoring to resolve ambiguous matches correctly.
1633fn build_rename_map(
1634    base_entities: &[SemanticEntity],
1635    branch_entities: &[SemanticEntity],
1636) -> HashMap<String, String> {
1637    let mut rename_map: HashMap<String, String> = HashMap::new();
1638
1639    let base_ids: HashSet<&str> = base_entities.iter().map(|e| e.id.as_str()).collect();
1640
1641    // Build body_hash → base entities (multiple can have same hash)
1642    let mut base_by_body: HashMap<u64, Vec<&SemanticEntity>> = HashMap::new();
1643    for entity in base_entities {
1644        base_by_body.entry(body_hash(entity)).or_default().push(entity);
1645    }
1646
1647    // Also keep structural_hash index as fallback
1648    let mut base_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
1649    for entity in base_entities {
1650        if let Some(ref sh) = entity.structural_hash {
1651            base_by_structural.entry(sh.as_str()).or_default().push(entity);
1652        }
1653    }
1654
1655    // Collect all candidate (branch_entity, base_entity, confidence) triples
1656    struct RenameCandidate<'a> {
1657        branch: &'a SemanticEntity,
1658        base: &'a SemanticEntity,
1659        confidence: f64,
1660    }
1661    let mut candidates: Vec<RenameCandidate> = Vec::new();
1662
1663    for branch_entity in branch_entities {
1664        if base_ids.contains(branch_entity.id.as_str()) {
1665            continue;
1666        }
1667
1668        let bh = body_hash(branch_entity);
1669
1670        // Body hash matches
1671        if let Some(base_entities_for_hash) = base_by_body.get(&bh) {
1672            for &base_entity in base_entities_for_hash {
1673                let same_type = base_entity.entity_type == branch_entity.entity_type;
1674                let same_parent = base_entity.parent_id == branch_entity.parent_id;
1675                let confidence = match (same_type, same_parent) {
1676                    (true, true) => 0.95,
1677                    (true, false) => 0.8,
1678                    (false, _) => 0.6,
1679                };
1680                candidates.push(RenameCandidate { branch: branch_entity, base: base_entity, confidence });
1681            }
1682        }
1683
1684        // Structural hash fallback (lower confidence)
1685        if let Some(ref sh) = branch_entity.structural_hash {
1686            if let Some(base_entities_for_sh) = base_by_structural.get(sh.as_str()) {
1687                for &base_entity in base_entities_for_sh {
1688                    // Skip if already covered by body hash match
1689                    if candidates.iter().any(|c| c.branch.id == branch_entity.id && c.base.id == base_entity.id) {
1690                        continue;
1691                    }
1692                    candidates.push(RenameCandidate { branch: branch_entity, base: base_entity, confidence: 0.6 });
1693                }
1694            }
1695        }
1696    }
1697
1698    // Sort by confidence descending, assign greedily
1699    candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
1700
1701    let mut used_base_ids: HashSet<String> = HashSet::new();
1702    let mut used_branch_ids: HashSet<String> = HashSet::new();
1703
1704    for candidate in &candidates {
1705        if candidate.confidence < 0.6 {
1706            break;
1707        }
1708        if used_base_ids.contains(&candidate.base.id) || used_branch_ids.contains(&candidate.branch.id) {
1709            continue;
1710        }
1711        // Don't rename if the base entity's ID still exists in branch (it wasn't actually renamed)
1712        let base_id_in_branch = branch_entities.iter().any(|e| e.id == candidate.base.id);
1713        if base_id_in_branch {
1714            continue;
1715        }
1716        rename_map.insert(candidate.branch.id.clone(), candidate.base.id.clone());
1717        used_base_ids.insert(candidate.base.id.clone());
1718        used_branch_ids.insert(candidate.branch.id.clone());
1719    }
1720
1721    rename_map
1722}
1723
1724/// Check if an entity type is a container that may benefit from inner entity merge.
1725fn is_container_entity_type(entity_type: &str) -> bool {
1726    matches!(
1727        entity_type,
1728        "class" | "interface" | "enum" | "impl" | "trait" | "module" | "impl_item" | "trait_item"
1729            | "struct" | "union" | "namespace" | "struct_item" | "struct_specifier"
1730            | "variable" | "export"
1731    )
1732}
1733
1734/// A named member chunk extracted from a class/container body.
1735#[derive(Debug, Clone)]
1736struct MemberChunk {
1737    /// The member name (method name, field name, etc.)
1738    name: String,
1739    /// Full content of the member including its body
1740    content: String,
1741}
1742
1743/// Result of an inner entity merge attempt.
1744struct InnerMergeResult {
1745    /// Merged content (may contain per-member conflict markers)
1746    content: String,
1747    /// Whether any members had conflicts
1748    has_conflicts: bool,
1749}
1750
1751/// Convert sem-core child entities to MemberChunks for inner merge.
1752///
1753/// Uses child entity line positions to extract content from the container text,
1754/// including any leading decorators/annotations that tree-sitter attaches as
1755/// sibling nodes rather than part of the method node.
1756fn children_to_chunks(
1757    children: &[&SemanticEntity],
1758    container_content: &str,
1759    container_start_line: usize,
1760) -> Vec<MemberChunk> {
1761    if children.is_empty() {
1762        return Vec::new();
1763    }
1764
1765    let lines: Vec<&str> = container_content.lines().collect();
1766    let mut chunks = Vec::new();
1767
1768    for (i, child) in children.iter().enumerate() {
1769        let child_start_idx = child.start_line.saturating_sub(container_start_line);
1770        // +1 because end_line is inclusive but we need an exclusive upper bound for slicing
1771        let child_end_idx = child.end_line.saturating_sub(container_start_line) + 1;
1772
1773        if child_end_idx > lines.len() + 1 || child_start_idx >= lines.len() {
1774            // Position out of range, fall back to entity content
1775            chunks.push(MemberChunk {
1776                name: child.name.clone(),
1777                content: child.content.clone(),
1778            });
1779            continue;
1780        }
1781        let child_end_idx = child_end_idx.min(lines.len());
1782
1783        // Determine the earliest line we can claim (after previous child's end, or body start)
1784        let floor = if i > 0 {
1785            children[i - 1].end_line.saturating_sub(container_start_line) + 1
1786        } else {
1787            // First child: start after the container header line (the `{` or `:` line)
1788            // Find the line containing `{` or ending with `:`
1789            let header_end = lines
1790                .iter()
1791                .position(|l| l.contains('{') || l.trim().ends_with(':'))
1792                .map(|p| p + 1)
1793                .unwrap_or(0);
1794            header_end
1795        };
1796
1797        // Scan backwards from child_start_idx to include decorators/annotations/comments
1798        let mut content_start = child_start_idx;
1799        while content_start > floor {
1800            let prev = content_start - 1;
1801            let trimmed = lines[prev].trim();
1802            if trimmed.starts_with('@')
1803                || trimmed.starts_with("#[")
1804                || trimmed.starts_with("//")
1805                || trimmed.starts_with("///")
1806                || trimmed.starts_with("/**")
1807                || trimmed.starts_with("* ")
1808                || trimmed == "*/"
1809            {
1810                content_start = prev;
1811            } else if trimmed.is_empty() && content_start > floor + 1 {
1812                // Allow one blank line between decorator and method
1813                content_start = prev;
1814            } else {
1815                break;
1816            }
1817        }
1818
1819        // Skip leading blank lines
1820        while content_start < child_start_idx && lines[content_start].trim().is_empty() {
1821            content_start += 1;
1822        }
1823
1824        let chunk_content: String = lines[content_start..child_end_idx].join("\n");
1825        chunks.push(MemberChunk {
1826            name: child.name.clone(),
1827            content: chunk_content,
1828        });
1829    }
1830
1831    chunks
1832}
1833
1834/// Generate a scoped conflict marker for a single member within a container merge.
1835fn scoped_conflict_marker(
1836    name: &str,
1837    ours: Option<&str>,
1838    theirs: Option<&str>,
1839    ours_deleted: bool,
1840    theirs_deleted: bool,
1841    fmt: &MarkerFormat,
1842) -> String {
1843    let open = "<".repeat(fmt.marker_length);
1844    let sep = "=".repeat(fmt.marker_length);
1845    let close = ">".repeat(fmt.marker_length);
1846
1847    let o = ours.unwrap_or("");
1848    let t = theirs.unwrap_or("");
1849
1850    // Narrow conflict markers to just the differing lines
1851    let ours_lines: Vec<&str> = o.lines().collect();
1852    let theirs_lines: Vec<&str> = t.lines().collect();
1853    let (prefix_len, suffix_len) = if ours.is_some() && theirs.is_some() {
1854        crate::conflict::narrow_conflict_lines(&ours_lines, &theirs_lines)
1855    } else {
1856        (0, 0)
1857    };
1858    let has_narrowing = prefix_len > 0 || suffix_len > 0;
1859    let ours_mid = &ours_lines[prefix_len..ours_lines.len() - suffix_len];
1860    let theirs_mid = &theirs_lines[prefix_len..theirs_lines.len() - suffix_len];
1861
1862    let mut out = String::new();
1863
1864    // Emit common prefix as clean text
1865    if has_narrowing {
1866        for line in &ours_lines[..prefix_len] {
1867            out.push_str(line);
1868            out.push('\n');
1869        }
1870    }
1871
1872    // Opening marker
1873    if fmt.enhanced {
1874        if ours_deleted {
1875            out.push_str(&format!("{} ours ({} deleted)\n", open, name));
1876        } else {
1877            out.push_str(&format!("{} ours ({})\n", open, name));
1878        }
1879    } else {
1880        out.push_str(&format!("{} ours\n", open));
1881    }
1882
1883    // Ours content (narrowed or full)
1884    if ours.is_some() {
1885        if has_narrowing {
1886            for line in ours_mid {
1887                out.push_str(line);
1888                out.push('\n');
1889            }
1890        } else {
1891            out.push_str(o);
1892            if !o.ends_with('\n') {
1893                out.push('\n');
1894            }
1895        }
1896    }
1897
1898    // Separator
1899    out.push_str(&format!("{}\n", sep));
1900
1901    // Theirs content (narrowed or full)
1902    if theirs.is_some() {
1903        if has_narrowing {
1904            for line in theirs_mid {
1905                out.push_str(line);
1906                out.push('\n');
1907            }
1908        } else {
1909            out.push_str(t);
1910            if !t.ends_with('\n') {
1911                out.push('\n');
1912            }
1913        }
1914    }
1915
1916    // Closing marker
1917    if fmt.enhanced {
1918        if theirs_deleted {
1919            out.push_str(&format!("{} theirs ({} deleted)", close, name));
1920        } else {
1921            out.push_str(&format!("{} theirs ({})", close, name));
1922        }
1923    } else {
1924        out.push_str(&format!("{} theirs", close));
1925    }
1926
1927    // Emit common suffix as clean text
1928    if has_narrowing {
1929        for line in &ours_lines[ours_lines.len() - suffix_len..] {
1930            out.push_str(line);
1931            out.push('\n');
1932        }
1933    }
1934
1935    out
1936}
1937
1938/// Try recursive inner entity merge for container types (classes, impls, etc.).
1939///
1940/// Inspired by LastMerge (arXiv:2507.19687): class members are "unordered children" —
1941/// reordering them is not a conflict. We chunk the class body into members, match by
1942/// name, and merge each member independently.
1943///
1944/// Returns Some(result) if chunking succeeded, None if we can't parse the container.
1945/// The result may contain per-member conflict markers (scoped conflicts).
1946fn try_inner_entity_merge(
1947    base: &str,
1948    ours: &str,
1949    theirs: &str,
1950    base_children: &[&SemanticEntity],
1951    ours_children: &[&SemanticEntity],
1952    theirs_children: &[&SemanticEntity],
1953    base_start_line: usize,
1954    ours_start_line: usize,
1955    theirs_start_line: usize,
1956    marker_format: &MarkerFormat,
1957) -> Option<InnerMergeResult> {
1958    // Try sem-core child entities first (tree-sitter-accurate boundaries),
1959    // fall back to indentation heuristic if children aren't available.
1960    // When children_to_chunks produces chunks, try indentation as a fallback
1961    // if the tree-sitter chunks lead to conflicts (the indentation heuristic
1962    // can include trailing context that helps diffy merge adjacent changes).
1963    let use_children = !ours_children.is_empty() || !theirs_children.is_empty();
1964    let (base_chunks, ours_chunks, theirs_chunks) = if use_children {
1965        (
1966            children_to_chunks(base_children, base, base_start_line),
1967            children_to_chunks(ours_children, ours, ours_start_line),
1968            children_to_chunks(theirs_children, theirs, theirs_start_line),
1969        )
1970    } else {
1971        (
1972            extract_member_chunks(base)?,
1973            extract_member_chunks(ours)?,
1974            extract_member_chunks(theirs)?,
1975        )
1976    };
1977
1978    // Need at least 1 member to attempt inner merge
1979    // (Even single-member containers benefit from decorator-aware merge)
1980    if base_chunks.is_empty() && ours_chunks.is_empty() && theirs_chunks.is_empty() {
1981        return None;
1982    }
1983
1984    // Build name → content maps
1985    let base_map: HashMap<&str, &str> = base_chunks
1986        .iter()
1987        .map(|c| (c.name.as_str(), c.content.as_str()))
1988        .collect();
1989    let ours_map: HashMap<&str, &str> = ours_chunks
1990        .iter()
1991        .map(|c| (c.name.as_str(), c.content.as_str()))
1992        .collect();
1993    let theirs_map: HashMap<&str, &str> = theirs_chunks
1994        .iter()
1995        .map(|c| (c.name.as_str(), c.content.as_str()))
1996        .collect();
1997
1998    // Collect all member names
1999    let mut all_names: Vec<String> = Vec::new();
2000    let mut seen: HashSet<String> = HashSet::new();
2001    // Use ours ordering as skeleton
2002    for chunk in &ours_chunks {
2003        if seen.insert(chunk.name.clone()) {
2004            all_names.push(chunk.name.clone());
2005        }
2006    }
2007    // Add theirs-only members
2008    for chunk in &theirs_chunks {
2009        if seen.insert(chunk.name.clone()) {
2010            all_names.push(chunk.name.clone());
2011        }
2012    }
2013
2014    // Extract header/footer (class declaration line and closing brace)
2015    let (ours_header, ours_footer) = extract_container_wrapper(ours)?;
2016
2017    let mut merged_members: Vec<String> = Vec::new();
2018    let mut has_conflict = false;
2019
2020    for name in &all_names {
2021        let in_base = base_map.get(name.as_str());
2022        let in_ours = ours_map.get(name.as_str());
2023        let in_theirs = theirs_map.get(name.as_str());
2024
2025        match (in_base, in_ours, in_theirs) {
2026            // In all three
2027            (Some(b), Some(o), Some(t)) => {
2028                if o == t {
2029                    merged_members.push(o.to_string());
2030                } else if b == o {
2031                    merged_members.push(t.to_string());
2032                } else if b == t {
2033                    merged_members.push(o.to_string());
2034                } else {
2035                    // Both changed differently: try diffy, then git merge-file, then decorator merge
2036                    if let Some(merged) = diffy_merge(b, o, t) {
2037                        merged_members.push(merged);
2038                    } else if let Some(merged) = git_merge_string(b, o, t) {
2039                        merged_members.push(merged);
2040                    } else if let Some(merged) = try_decorator_aware_merge(b, o, t) {
2041                        merged_members.push(merged);
2042                    } else {
2043                        // Emit per-member conflict markers
2044                        has_conflict = true;
2045                        merged_members.push(scoped_conflict_marker(name, Some(o), Some(t), false, false, marker_format));
2046                    }
2047                }
2048            }
2049            // Deleted by theirs, ours unchanged or not in base
2050            (Some(b), Some(o), None) => {
2051                if *b == *o {
2052                    // Ours unchanged, theirs deleted → accept deletion
2053                } else {
2054                    // Ours modified, theirs deleted → per-member conflict
2055                    has_conflict = true;
2056                    merged_members.push(scoped_conflict_marker(name, Some(o), None, false, true, marker_format));
2057                }
2058            }
2059            // Deleted by ours, theirs unchanged or not in base
2060            (Some(b), None, Some(t)) => {
2061                if *b == *t {
2062                    // Theirs unchanged, ours deleted → accept deletion
2063                } else {
2064                    // Theirs modified, ours deleted → per-member conflict
2065                    has_conflict = true;
2066                    merged_members.push(scoped_conflict_marker(name, None, Some(t), true, false, marker_format));
2067                }
2068            }
2069            // Added by ours only
2070            (None, Some(o), None) => {
2071                merged_members.push(o.to_string());
2072            }
2073            // Added by theirs only
2074            (None, None, Some(t)) => {
2075                merged_members.push(t.to_string());
2076            }
2077            // Added by both with different content
2078            (None, Some(o), Some(t)) => {
2079                if o == t {
2080                    merged_members.push(o.to_string());
2081                } else {
2082                    has_conflict = true;
2083                    merged_members.push(scoped_conflict_marker(name, Some(o), Some(t), false, false, marker_format));
2084                }
2085            }
2086            // Deleted by both
2087            (Some(_), None, None) => {}
2088            (None, None, None) => {}
2089        }
2090    }
2091
2092    // Reconstruct: header + merged members + footer
2093    let mut result = String::new();
2094    result.push_str(ours_header);
2095    if !ours_header.ends_with('\n') {
2096        result.push('\n');
2097    }
2098
2099    // Detect if members are single-line (fields, variants) vs multi-line (methods)
2100    let has_multiline_members = merged_members.iter().any(|m| m.contains('\n'));
2101
2102    for (i, member) in merged_members.iter().enumerate() {
2103        result.push_str(member);
2104        if !member.ends_with('\n') {
2105            result.push('\n');
2106        }
2107        // Add blank line between multi-line members (methods) but not single-line (fields, variants)
2108        if i < merged_members.len() - 1 && has_multiline_members && !member.ends_with("\n\n") {
2109            result.push('\n');
2110        }
2111    }
2112
2113    result.push_str(ours_footer);
2114    if !ours_footer.ends_with('\n') && ours.ends_with('\n') {
2115        result.push('\n');
2116    }
2117
2118    // If children_to_chunks led to conflicts, retry with indentation heuristic.
2119    // The indentation approach includes trailing blank lines in chunks, giving
2120    // diffy more context to merge adjacent changes from different branches.
2121    if has_conflict && use_children {
2122        if let (Some(bc), Some(oc), Some(tc)) = (
2123            extract_member_chunks(base),
2124            extract_member_chunks(ours),
2125            extract_member_chunks(theirs),
2126        ) {
2127            if !bc.is_empty() || !oc.is_empty() || !tc.is_empty() {
2128                let fallback = try_inner_merge_with_chunks(
2129                    &bc, &oc, &tc, ours, ours_header, ours_footer,
2130                    has_multiline_members, marker_format,
2131                );
2132                if let Some(fb) = fallback {
2133                    if !fb.has_conflicts {
2134                        return Some(fb);
2135                    }
2136                }
2137            }
2138        }
2139    }
2140
2141    Some(InnerMergeResult {
2142        content: result,
2143        has_conflicts: has_conflict,
2144    })
2145}
2146
2147/// Inner merge helper using pre-extracted chunks. Used for indentation-heuristic fallback.
2148fn try_inner_merge_with_chunks(
2149    base_chunks: &[MemberChunk],
2150    ours_chunks: &[MemberChunk],
2151    theirs_chunks: &[MemberChunk],
2152    ours: &str,
2153    ours_header: &str,
2154    ours_footer: &str,
2155    has_multiline_hint: bool,
2156    marker_format: &MarkerFormat,
2157) -> Option<InnerMergeResult> {
2158    let base_map: HashMap<&str, &str> = base_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2159    let ours_map: HashMap<&str, &str> = ours_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2160    let theirs_map: HashMap<&str, &str> = theirs_chunks.iter().map(|c| (c.name.as_str(), c.content.as_str())).collect();
2161
2162    let mut all_names: Vec<String> = Vec::new();
2163    let mut seen: HashSet<String> = HashSet::new();
2164    for chunk in ours_chunks {
2165        if seen.insert(chunk.name.clone()) {
2166            all_names.push(chunk.name.clone());
2167        }
2168    }
2169    for chunk in theirs_chunks {
2170        if seen.insert(chunk.name.clone()) {
2171            all_names.push(chunk.name.clone());
2172        }
2173    }
2174
2175    let mut merged_members: Vec<String> = Vec::new();
2176    let mut has_conflict = false;
2177
2178    for name in &all_names {
2179        let in_base = base_map.get(name.as_str());
2180        let in_ours = ours_map.get(name.as_str());
2181        let in_theirs = theirs_map.get(name.as_str());
2182
2183        match (in_base, in_ours, in_theirs) {
2184            (Some(b), Some(o), Some(t)) => {
2185                if o == t {
2186                    merged_members.push(o.to_string());
2187                } else if b == o {
2188                    merged_members.push(t.to_string());
2189                } else if b == t {
2190                    merged_members.push(o.to_string());
2191                } else if let Some(merged) = diffy_merge(b, o, t) {
2192                    merged_members.push(merged);
2193                } else if let Some(merged) = git_merge_string(b, o, t) {
2194                    merged_members.push(merged);
2195                } else {
2196                    has_conflict = true;
2197                    merged_members.push(scoped_conflict_marker(name, Some(o), Some(t), false, false, marker_format));
2198                }
2199            }
2200            (Some(b), Some(o), None) => {
2201                if *b != *o { merged_members.push(o.to_string()); }
2202            }
2203            (Some(b), None, Some(t)) => {
2204                if *b != *t { merged_members.push(t.to_string()); }
2205            }
2206            (None, Some(o), None) => merged_members.push(o.to_string()),
2207            (None, None, Some(t)) => merged_members.push(t.to_string()),
2208            (None, Some(o), Some(t)) => {
2209                if o == t {
2210                    merged_members.push(o.to_string());
2211                } else {
2212                    has_conflict = true;
2213                    merged_members.push(scoped_conflict_marker(name, Some(o), Some(t), false, false, marker_format));
2214                }
2215            }
2216            (Some(_), None, None) | (None, None, None) => {}
2217        }
2218    }
2219
2220    let has_multiline_members = has_multiline_hint || merged_members.iter().any(|m| m.contains('\n'));
2221    let mut result = String::new();
2222    result.push_str(ours_header);
2223    if !ours_header.ends_with('\n') { result.push('\n'); }
2224    for (i, member) in merged_members.iter().enumerate() {
2225        result.push_str(member);
2226        if !member.ends_with('\n') { result.push('\n'); }
2227        if i < merged_members.len() - 1 && has_multiline_members && !member.ends_with("\n\n") {
2228            result.push('\n');
2229        }
2230    }
2231    result.push_str(ours_footer);
2232    if !ours_footer.ends_with('\n') && ours.ends_with('\n') { result.push('\n'); }
2233
2234    Some(InnerMergeResult {
2235        content: result,
2236        has_conflicts: has_conflict,
2237    })
2238}
2239
2240/// Extract the header (class declaration) and footer (closing brace) from a container.
2241/// Supports both brace-delimited (JS/TS/Java/Rust/C) and indentation-based (Python) containers.
2242fn extract_container_wrapper(content: &str) -> Option<(&str, &str)> {
2243    let lines: Vec<&str> = content.lines().collect();
2244    if lines.len() < 2 {
2245        return None;
2246    }
2247
2248    // Check if this is a Python-style container (ends with `:` instead of `{`)
2249    let is_python_style = lines.iter().any(|l| {
2250        let trimmed = l.trim();
2251        (trimmed.starts_with("class ") || trimmed.starts_with("def "))
2252            && trimmed.ends_with(':')
2253    }) && !lines.iter().any(|l| l.contains('{'));
2254
2255    if is_python_style {
2256        // Python: header is the `class Foo:` line, no footer
2257        let header_end = lines.iter().position(|l| l.trim().ends_with(':'))?;
2258        let header_byte_end: usize = lines[..=header_end]
2259            .iter()
2260            .map(|l| l.len() + 1)
2261            .sum();
2262        let header = &content[..header_byte_end.min(content.len())];
2263        // No closing brace in Python — footer is empty
2264        let footer = &content[content.len()..];
2265        Some((header, footer))
2266    } else {
2267        // Brace-delimited: header up to `{`, footer from last `}`
2268        let header_end = lines.iter().position(|l| l.contains('{'))?;
2269        let header_byte_end = lines[..=header_end]
2270            .iter()
2271            .map(|l| l.len() + 1)
2272            .sum::<usize>();
2273        let header = &content[..header_byte_end.min(content.len())];
2274
2275        let footer_start = lines.iter().rposition(|l| {
2276            let trimmed = l.trim();
2277            trimmed == "}" || trimmed == "};"
2278        })?;
2279
2280        let footer_byte_start: usize = lines[..footer_start]
2281            .iter()
2282            .map(|l| l.len() + 1)
2283            .sum();
2284        let footer = &content[footer_byte_start.min(content.len())..];
2285
2286        Some((header, footer))
2287    }
2288}
2289
2290/// Extract named member chunks from a container body.
2291///
2292/// Identifies member boundaries by indentation: members start at the first
2293/// indentation level inside the container. Each member extends until the next
2294/// member starts or the container closes.
2295fn extract_member_chunks(content: &str) -> Option<Vec<MemberChunk>> {
2296    let lines: Vec<&str> = content.lines().collect();
2297    if lines.len() < 2 {
2298        return None;
2299    }
2300
2301    // Check if Python-style (indentation-based)
2302    let is_python_style = lines.iter().any(|l| {
2303        let trimmed = l.trim();
2304        (trimmed.starts_with("class ") || trimmed.starts_with("def "))
2305            && trimmed.ends_with(':')
2306    }) && !lines.iter().any(|l| l.contains('{'));
2307
2308    // Find the body range
2309    let body_start = if is_python_style {
2310        lines.iter().position(|l| l.trim().ends_with(':'))? + 1
2311    } else {
2312        lines.iter().position(|l| l.contains('{'))? + 1
2313    };
2314    let body_end = if is_python_style {
2315        // Python: body extends to end of content
2316        lines.len()
2317    } else {
2318        lines.iter().rposition(|l| {
2319            let trimmed = l.trim();
2320            trimmed == "}" || trimmed == "};"
2321        })?
2322    };
2323
2324    if body_start >= body_end {
2325        return None;
2326    }
2327
2328    // Determine member indentation level by looking at first non-empty body line
2329    let member_indent = lines[body_start..body_end]
2330        .iter()
2331        .find(|l| !l.trim().is_empty())
2332        .map(|l| l.len() - l.trim_start().len())?;
2333
2334    let mut chunks: Vec<MemberChunk> = Vec::new();
2335    let mut current_chunk_lines: Vec<&str> = Vec::new();
2336    let mut current_name: Option<String> = None;
2337
2338    for line in &lines[body_start..body_end] {
2339        let trimmed = line.trim();
2340        if trimmed.is_empty() {
2341            // Blank lines: if we have a current chunk, include them
2342            if current_name.is_some() {
2343                // Only include if not trailing blanks
2344                current_chunk_lines.push(line);
2345            }
2346            continue;
2347        }
2348
2349        let indent = line.len() - line.trim_start().len();
2350
2351        // Is this a new member declaration at the member indent level?
2352        // Exclude closing braces, comments, and decorators/annotations
2353        if indent == member_indent
2354            && !trimmed.starts_with("//")
2355            && !trimmed.starts_with("/*")
2356            && !trimmed.starts_with("*")
2357            && !trimmed.starts_with("#")
2358            && !trimmed.starts_with("@")
2359            && trimmed != "}"
2360            && trimmed != "};"
2361            && trimmed != ","
2362        {
2363            // Save previous chunk
2364            if let Some(name) = current_name.take() {
2365                // Trim trailing blank lines
2366                while current_chunk_lines.last().map_or(false, |l| l.trim().is_empty()) {
2367                    current_chunk_lines.pop();
2368                }
2369                if !current_chunk_lines.is_empty() {
2370                    chunks.push(MemberChunk {
2371                        name,
2372                        content: current_chunk_lines.join("\n"),
2373                    });
2374                }
2375                current_chunk_lines.clear();
2376            }
2377
2378            // Start new chunk — extract member name
2379            let name = extract_member_name(trimmed);
2380            current_name = Some(name);
2381            current_chunk_lines.push(line);
2382        } else if current_name.is_some() {
2383            // Continuation of current member (body lines, nested blocks)
2384            current_chunk_lines.push(line);
2385        } else {
2386            // Content before first member (decorators, comments for first member)
2387            // Attach to next member
2388            current_chunk_lines.push(line);
2389        }
2390    }
2391
2392    // Save last chunk
2393    if let Some(name) = current_name {
2394        while current_chunk_lines.last().map_or(false, |l| l.trim().is_empty()) {
2395            current_chunk_lines.pop();
2396        }
2397        if !current_chunk_lines.is_empty() {
2398            chunks.push(MemberChunk {
2399                name,
2400                content: current_chunk_lines.join("\n"),
2401            });
2402        }
2403    }
2404
2405    if chunks.is_empty() {
2406        None
2407    } else {
2408        Some(chunks)
2409    }
2410}
2411
2412/// Extract a member name from a declaration line.
2413fn extract_member_name(line: &str) -> String {
2414    let trimmed = line.trim();
2415
2416    // Go method receiver: `func (c *Calculator) Add(` -> skip receiver, find name before second `(`
2417    if trimmed.starts_with("func ") && trimmed.get(5..6) == Some("(") {
2418        // Skip past the receiver: find closing `)`, then extract name before next `(`
2419        if let Some(recv_close) = trimmed.find(')') {
2420            let after_recv = &trimmed[recv_close + 1..];
2421            if let Some(paren_pos) = after_recv.find('(') {
2422                let before = after_recv[..paren_pos].trim();
2423                let name: String = before
2424                    .chars()
2425                    .rev()
2426                    .take_while(|c| c.is_alphanumeric() || *c == '_')
2427                    .collect::<Vec<_>>()
2428                    .into_iter()
2429                    .rev()
2430                    .collect();
2431                if !name.is_empty() {
2432                    return name;
2433                }
2434            }
2435        }
2436    }
2437
2438    // Strategy 1: For method/function declarations with parentheses,
2439    // the name is the identifier immediately before `(`.
2440    // This handles all languages: Java `public int add(`, Rust `pub fn add(`,
2441    // Python `def add(`, TS `async getUser(`, Go `func add(`, etc.
2442    if let Some(paren_pos) = trimmed.find('(') {
2443        let before = trimmed[..paren_pos].trim_end();
2444        let name: String = before
2445            .chars()
2446            .rev()
2447            .take_while(|c| c.is_alphanumeric() || *c == '_')
2448            .collect::<Vec<_>>()
2449            .into_iter()
2450            .rev()
2451            .collect();
2452        if !name.is_empty() {
2453            return name;
2454        }
2455    }
2456
2457    // Strategy 2: For fields/properties/variants without parens,
2458    // strip keywords and take the first identifier.
2459    let mut s = trimmed;
2460    for keyword in &[
2461        "export ", "public ", "private ", "protected ", "static ",
2462        "abstract ", "async ", "override ", "readonly ",
2463        "pub ", "pub(crate) ", "fn ", "def ", "get ", "set ",
2464    ] {
2465        if s.starts_with(keyword) {
2466            s = &s[keyword.len()..];
2467        }
2468    }
2469    if s.starts_with("fn ") {
2470        s = &s[3..];
2471    }
2472
2473    let name: String = s
2474        .chars()
2475        .take_while(|c| c.is_alphanumeric() || *c == '_')
2476        .collect();
2477
2478    if name.is_empty() {
2479        trimmed.chars().take(20).collect()
2480    } else {
2481        name
2482    }
2483}
2484
2485/// Returns true for data/config file formats where Sesame separator expansion
2486/// (`{`, `}`, `;`) is counterproductive because those chars are structural
2487/// content rather than code block separators.
2488///
2489/// Note: template files like .svelte/.vue are NOT included here because their
2490/// embedded `<script>` sections contain real code where Sesame helps.
2491/// Check if content looks binary (contains null bytes in first 8KB).
2492fn is_binary(content: &str) -> bool {
2493    content.as_bytes().iter().take(8192).any(|&b| b == 0)
2494}
2495
2496/// Check if content already contains git conflict markers.
2497/// This happens with AU/AA conflicts where git stores markers in stage blobs.
2498fn has_conflict_markers(content: &str) -> bool {
2499    content.contains("<<<<<<<") && content.contains(">>>>>>>")
2500}
2501
2502fn skip_sesame(file_path: &str) -> bool {
2503    let path_lower = file_path.to_lowercase();
2504    let extensions = [
2505        // Data/config formats
2506        ".json", ".yaml", ".yml", ".toml", ".lock", ".xml", ".csv", ".tsv",
2507        ".ini", ".cfg", ".conf", ".properties", ".env",
2508        // Markup/document formats
2509        ".md", ".markdown", ".txt", ".rst", ".svg", ".html", ".htm",
2510    ];
2511    extensions.iter().any(|ext| path_lower.ends_with(ext))
2512}
2513
2514/// Expand syntactic separators into separate lines for finer merge alignment.
2515/// Inspired by Sesame (arXiv:2407.18888): isolating separators lets line-based
2516/// merge tools see block boundaries as independent change units.
2517/// Uses byte-level iteration since separators ({, }, ;) and string delimiters
2518/// (", ', `) are all ASCII.
2519fn expand_separators(content: &str) -> String {
2520    let bytes = content.as_bytes();
2521    let mut result = Vec::with_capacity(content.len() * 2);
2522    let mut in_string = false;
2523    let mut escape_next = false;
2524    let mut string_char = b'"';
2525
2526    for &b in bytes {
2527        if escape_next {
2528            result.push(b);
2529            escape_next = false;
2530            continue;
2531        }
2532        if b == b'\\' && in_string {
2533            result.push(b);
2534            escape_next = true;
2535            continue;
2536        }
2537        if !in_string && (b == b'"' || b == b'\'' || b == b'`') {
2538            in_string = true;
2539            string_char = b;
2540            result.push(b);
2541            continue;
2542        }
2543        if in_string && b == string_char {
2544            in_string = false;
2545            result.push(b);
2546            continue;
2547        }
2548
2549        if !in_string && (b == b'{' || b == b'}' || b == b';') {
2550            if result.last() != Some(&b'\n') && !result.is_empty() {
2551                result.push(b'\n');
2552            }
2553            result.push(b);
2554            result.push(b'\n');
2555        } else {
2556            result.push(b);
2557        }
2558    }
2559
2560    // Safe: we only inserted ASCII bytes into valid UTF-8 content
2561    unsafe { String::from_utf8_unchecked(result) }
2562}
2563
2564/// Collapse separator expansion back to original formatting.
2565/// Uses the base formatting as a guide where possible.
2566fn collapse_separators(merged: &str, _base: &str) -> String {
2567    // Simple approach: join lines that contain only a separator with adjacent lines
2568    let lines: Vec<&str> = merged.lines().collect();
2569    let mut result = String::new();
2570    let mut i = 0;
2571
2572    while i < lines.len() {
2573        let trimmed = lines[i].trim();
2574        if (trimmed == "{" || trimmed == "}" || trimmed == ";") && trimmed.len() == 1 {
2575            // This is a separator-only line we may have created
2576            // Try to join with previous line if it doesn't end with a separator
2577            if !result.is_empty() && !result.ends_with('\n') {
2578                // Peek: if it's an opening brace, join with previous
2579                if trimmed == "{" {
2580                    result.push(' ');
2581                    result.push_str(trimmed);
2582                    result.push('\n');
2583                } else if trimmed == "}" {
2584                    result.push('\n');
2585                    result.push_str(trimmed);
2586                    result.push('\n');
2587                } else {
2588                    result.push_str(trimmed);
2589                    result.push('\n');
2590                }
2591            } else {
2592                result.push_str(lines[i]);
2593                result.push('\n');
2594            }
2595        } else {
2596            result.push_str(lines[i]);
2597            result.push('\n');
2598        }
2599        i += 1;
2600    }
2601
2602    // Trim any trailing extra newlines to match original style
2603    while result.ends_with("\n\n") {
2604        result.pop();
2605    }
2606
2607    result
2608}
2609
2610#[cfg(test)]
2611mod tests {
2612    use super::*;
2613
2614    #[test]
2615    fn test_replace_at_word_boundaries() {
2616        // Should replace standalone occurrences
2617        assert_eq!(replace_at_word_boundaries("fn get() {}", "get", "__E__"), "fn __E__() {}");
2618        // Should NOT replace inside longer identifiers
2619        assert_eq!(replace_at_word_boundaries("fn getAll() {}", "get", "__E__"), "fn getAll() {}");
2620        assert_eq!(replace_at_word_boundaries("fn _get() {}", "get", "__E__"), "fn _get() {}");
2621        // Should replace multiple standalone occurrences
2622        assert_eq!(
2623            replace_at_word_boundaries("pub enum Source { Source }", "Source", "__E__"),
2624            "pub enum __E__ { __E__ }"
2625        );
2626        // Should not replace substring at start/end of identifiers
2627        assert_eq!(
2628            replace_at_word_boundaries("SourceManager isSource", "Source", "__E__"),
2629            "SourceManager isSource"
2630        );
2631        // Should handle multi-byte UTF-8 characters (emojis) without panicking
2632        assert_eq!(
2633            replace_at_word_boundaries("❌ get ✅", "get", "__E__"),
2634            "❌ __E__ ✅"
2635        );
2636        assert_eq!(
2637            replace_at_word_boundaries("fn 名前() { get }", "get", "__E__"),
2638            "fn 名前() { __E__ }"
2639        );
2640        // Emoji-only content with no needle match should pass through unchanged
2641        assert_eq!(
2642            replace_at_word_boundaries("🎉🚀✨", "get", "__E__"),
2643            "🎉🚀✨"
2644        );
2645    }
2646
2647    #[test]
2648    fn test_fast_path_identical() {
2649        let content = "hello world";
2650        let result = entity_merge(content, content, content, "test.ts");
2651        assert!(result.is_clean());
2652        assert_eq!(result.content, content);
2653    }
2654
2655    #[test]
2656    fn test_fast_path_only_ours_changed() {
2657        let base = "hello";
2658        let ours = "hello world";
2659        let result = entity_merge(base, ours, base, "test.ts");
2660        assert!(result.is_clean());
2661        assert_eq!(result.content, ours);
2662    }
2663
2664    #[test]
2665    fn test_fast_path_only_theirs_changed() {
2666        let base = "hello";
2667        let theirs = "hello world";
2668        let result = entity_merge(base, base, theirs, "test.ts");
2669        assert!(result.is_clean());
2670        assert_eq!(result.content, theirs);
2671    }
2672
2673    #[test]
2674    fn test_different_functions_no_conflict() {
2675        // Core value prop: two agents add different functions to the same file
2676        let base = r#"export function existing() {
2677    return 1;
2678}
2679"#;
2680        let ours = r#"export function existing() {
2681    return 1;
2682}
2683
2684export function agentA() {
2685    return "added by agent A";
2686}
2687"#;
2688        let theirs = r#"export function existing() {
2689    return 1;
2690}
2691
2692export function agentB() {
2693    return "added by agent B";
2694}
2695"#;
2696        let result = entity_merge(base, ours, theirs, "test.ts");
2697        assert!(
2698            result.is_clean(),
2699            "Should auto-resolve: different functions added. Conflicts: {:?}",
2700            result.conflicts
2701        );
2702        assert!(
2703            result.content.contains("agentA"),
2704            "Should contain agentA function"
2705        );
2706        assert!(
2707            result.content.contains("agentB"),
2708            "Should contain agentB function"
2709        );
2710    }
2711
2712    #[test]
2713    fn test_same_function_modified_by_both_conflict() {
2714        let base = r#"export function shared() {
2715    return "original";
2716}
2717"#;
2718        let ours = r#"export function shared() {
2719    return "modified by ours";
2720}
2721"#;
2722        let theirs = r#"export function shared() {
2723    return "modified by theirs";
2724}
2725"#;
2726        let result = entity_merge(base, ours, theirs, "test.ts");
2727        // This should be a conflict since both modified the same function incompatibly
2728        assert!(
2729            !result.is_clean(),
2730            "Should conflict when both modify same function differently"
2731        );
2732        assert_eq!(result.conflicts.len(), 1);
2733        assert_eq!(result.conflicts[0].entity_name, "shared");
2734    }
2735
2736    #[test]
2737    fn test_fallback_for_unknown_filetype() {
2738        // Non-adjacent changes should merge cleanly with line-level merge
2739        let base = "line 1\nline 2\nline 3\nline 4\nline 5\n";
2740        let ours = "line 1 modified\nline 2\nline 3\nline 4\nline 5\n";
2741        let theirs = "line 1\nline 2\nline 3\nline 4\nline 5 modified\n";
2742        let result = entity_merge(base, ours, theirs, "test.xyz");
2743        assert!(
2744            result.is_clean(),
2745            "Non-adjacent changes should merge cleanly. Conflicts: {:?}",
2746            result.conflicts,
2747        );
2748    }
2749
2750    #[test]
2751    fn test_line_level_fallback() {
2752        // Non-adjacent changes merge cleanly in 3-way merge
2753        let base = "a\nb\nc\nd\ne\n";
2754        let ours = "A\nb\nc\nd\ne\n";
2755        let theirs = "a\nb\nc\nd\nE\n";
2756        let result = line_level_fallback(base, ours, theirs, "test.rs");
2757        assert!(result.is_clean());
2758        assert!(result.stats.used_fallback);
2759        assert_eq!(result.content, "A\nb\nc\nd\nE\n");
2760    }
2761
2762    #[test]
2763    fn test_line_level_fallback_conflict() {
2764        // Same line changed differently → conflict
2765        let base = "a\nb\nc\n";
2766        let ours = "X\nb\nc\n";
2767        let theirs = "Y\nb\nc\n";
2768        let result = line_level_fallback(base, ours, theirs, "test.rs");
2769        assert!(!result.is_clean());
2770        assert!(result.stats.used_fallback);
2771    }
2772
2773    #[test]
2774    fn test_expand_separators() {
2775        let code = "function foo() { return 1; }";
2776        let expanded = expand_separators(code);
2777        // Separators should be on their own lines
2778        assert!(expanded.contains("{\n"), "Opening brace should have newline after");
2779        assert!(expanded.contains(";\n"), "Semicolons should have newline after");
2780        assert!(expanded.contains("\n}"), "Closing brace should have newline before");
2781    }
2782
2783    #[test]
2784    fn test_expand_separators_preserves_strings() {
2785        let code = r#"let x = "hello { world };";"#;
2786        let expanded = expand_separators(code);
2787        // Separators inside strings should NOT be expanded
2788        assert!(
2789            expanded.contains("\"hello { world };\""),
2790            "Separators in strings should be preserved: {}",
2791            expanded
2792        );
2793    }
2794
2795    #[test]
2796    fn test_is_import_region() {
2797        assert!(is_import_region("import foo from 'foo';\nimport bar from 'bar';\n"));
2798        assert!(is_import_region("use std::io;\nuse std::fs;\n"));
2799        assert!(!is_import_region("let x = 1;\nlet y = 2;\n"));
2800        // Mixed: 1 import + 2 non-imports → not import region
2801        assert!(!is_import_region("import foo from 'foo';\nlet x = 1;\nlet y = 2;\n"));
2802        // Empty → not import region
2803        assert!(!is_import_region(""));
2804    }
2805
2806    #[test]
2807    fn test_is_import_line() {
2808        // JS/TS
2809        assert!(is_import_line("import foo from 'foo';"));
2810        assert!(is_import_line("import { bar } from 'bar';"));
2811        assert!(is_import_line("from typing import List"));
2812        // Rust
2813        assert!(is_import_line("use std::io::Read;"));
2814        // C/C++
2815        assert!(is_import_line("#include <stdio.h>"));
2816        // Node require
2817        assert!(is_import_line("const fs = require('fs');"));
2818        // Not imports
2819        assert!(!is_import_line("let x = 1;"));
2820        assert!(!is_import_line("function foo() {}"));
2821    }
2822
2823    #[test]
2824    fn test_commutative_import_merge_both_add_different() {
2825        // The key scenario: both branches add different imports
2826        let base = "import a from 'a';\nimport b from 'b';\n";
2827        let ours = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
2828        let theirs = "import a from 'a';\nimport b from 'b';\nimport d from 'd';\n";
2829        let result = merge_imports_commutatively(base, ours, theirs);
2830        assert!(result.contains("import a from 'a';"));
2831        assert!(result.contains("import b from 'b';"));
2832        assert!(result.contains("import c from 'c';"));
2833        assert!(result.contains("import d from 'd';"));
2834    }
2835
2836    #[test]
2837    fn test_commutative_import_merge_one_removes() {
2838        // Ours removes an import, theirs keeps it → removed
2839        let base = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
2840        let ours = "import a from 'a';\nimport c from 'c';\n";
2841        let theirs = "import a from 'a';\nimport b from 'b';\nimport c from 'c';\n";
2842        let result = merge_imports_commutatively(base, ours, theirs);
2843        assert!(result.contains("import a from 'a';"));
2844        assert!(!result.contains("import b from 'b';"), "Removed import should stay removed");
2845        assert!(result.contains("import c from 'c';"));
2846    }
2847
2848    #[test]
2849    fn test_commutative_import_merge_both_add_same() {
2850        // Both add the same import → should appear only once
2851        let base = "import a from 'a';\n";
2852        let ours = "import a from 'a';\nimport b from 'b';\n";
2853        let theirs = "import a from 'a';\nimport b from 'b';\n";
2854        let result = merge_imports_commutatively(base, ours, theirs);
2855        let count = result.matches("import b from 'b';").count();
2856        assert_eq!(count, 1, "Duplicate import should be deduplicated");
2857    }
2858
2859    #[test]
2860    fn test_inner_entity_merge_different_methods() {
2861        // Two agents modify different methods in the same class
2862        // This would normally conflict with diffy because the changes are adjacent
2863        let base = r#"export class Calculator {
2864    add(a: number, b: number): number {
2865        return a + b;
2866    }
2867
2868    subtract(a: number, b: number): number {
2869        return a - b;
2870    }
2871}
2872"#;
2873        let ours = r#"export class Calculator {
2874    add(a: number, b: number): number {
2875        // Added logging
2876        console.log("adding", a, b);
2877        return a + b;
2878    }
2879
2880    subtract(a: number, b: number): number {
2881        return a - b;
2882    }
2883}
2884"#;
2885        let theirs = r#"export class Calculator {
2886    add(a: number, b: number): number {
2887        return a + b;
2888    }
2889
2890    subtract(a: number, b: number): number {
2891        // Added validation
2892        if (b > a) throw new Error("negative");
2893        return a - b;
2894    }
2895}
2896"#;
2897        let result = entity_merge(base, ours, theirs, "test.ts");
2898        assert!(
2899            result.is_clean(),
2900            "Different methods modified should auto-merge via inner entity merge. Conflicts: {:?}",
2901            result.conflicts,
2902        );
2903        assert!(result.content.contains("console.log"), "Should contain ours changes");
2904        assert!(result.content.contains("negative"), "Should contain theirs changes");
2905    }
2906
2907    #[test]
2908    fn test_inner_entity_merge_both_add_different_methods() {
2909        // Both branches add different methods to the same class
2910        let base = r#"export class Calculator {
2911    add(a: number, b: number): number {
2912        return a + b;
2913    }
2914}
2915"#;
2916        let ours = r#"export class Calculator {
2917    add(a: number, b: number): number {
2918        return a + b;
2919    }
2920
2921    multiply(a: number, b: number): number {
2922        return a * b;
2923    }
2924}
2925"#;
2926        let theirs = r#"export class Calculator {
2927    add(a: number, b: number): number {
2928        return a + b;
2929    }
2930
2931    divide(a: number, b: number): number {
2932        return a / b;
2933    }
2934}
2935"#;
2936        let result = entity_merge(base, ours, theirs, "test.ts");
2937        assert!(
2938            result.is_clean(),
2939            "Both adding different methods should auto-merge. Conflicts: {:?}",
2940            result.conflicts,
2941        );
2942        assert!(result.content.contains("multiply"), "Should contain ours's new method");
2943        assert!(result.content.contains("divide"), "Should contain theirs's new method");
2944    }
2945
2946    #[test]
2947    fn test_inner_entity_merge_same_method_modified_still_conflicts() {
2948        // Both modify the same method differently → should still conflict
2949        let base = r#"export class Calculator {
2950    add(a: number, b: number): number {
2951        return a + b;
2952    }
2953
2954    subtract(a: number, b: number): number {
2955        return a - b;
2956    }
2957}
2958"#;
2959        let ours = r#"export class Calculator {
2960    add(a: number, b: number): number {
2961        return a + b + 1;
2962    }
2963
2964    subtract(a: number, b: number): number {
2965        return a - b;
2966    }
2967}
2968"#;
2969        let theirs = r#"export class Calculator {
2970    add(a: number, b: number): number {
2971        return a + b + 2;
2972    }
2973
2974    subtract(a: number, b: number): number {
2975        return a - b;
2976    }
2977}
2978"#;
2979        let result = entity_merge(base, ours, theirs, "test.ts");
2980        assert!(
2981            !result.is_clean(),
2982            "Both modifying same method differently should still conflict"
2983        );
2984    }
2985
2986    #[test]
2987    fn test_extract_member_chunks() {
2988        let class_body = r#"export class Foo {
2989    bar() {
2990        return 1;
2991    }
2992
2993    baz() {
2994        return 2;
2995    }
2996}
2997"#;
2998        let chunks = extract_member_chunks(class_body).unwrap();
2999        assert_eq!(chunks.len(), 2, "Should find 2 members, found {:?}", chunks.iter().map(|c| &c.name).collect::<Vec<_>>());
3000        assert_eq!(chunks[0].name, "bar");
3001        assert_eq!(chunks[1].name, "baz");
3002    }
3003
3004    #[test]
3005    fn test_extract_member_name() {
3006        assert_eq!(extract_member_name("add(a, b) {"), "add");
3007        assert_eq!(extract_member_name("fn add(&self, a: i32) -> i32 {"), "add");
3008        assert_eq!(extract_member_name("def add(self, a, b):"), "add");
3009        assert_eq!(extract_member_name("public static getValue(): number {"), "getValue");
3010        assert_eq!(extract_member_name("async fetchData() {"), "fetchData");
3011    }
3012
3013    #[test]
3014    fn test_commutative_import_merge_rust_use() {
3015        let base = "use std::io;\nuse std::fs;\n";
3016        let ours = "use std::io;\nuse std::fs;\nuse std::path::Path;\n";
3017        let theirs = "use std::io;\nuse std::fs;\nuse std::collections::HashMap;\n";
3018        let result = merge_imports_commutatively(base, ours, theirs);
3019        assert!(result.contains("use std::path::Path;"));
3020        assert!(result.contains("use std::collections::HashMap;"));
3021        assert!(result.contains("use std::io;"));
3022        assert!(result.contains("use std::fs;"));
3023    }
3024
3025    #[test]
3026    fn test_is_whitespace_only_diff_true() {
3027        // Same content, different indentation
3028        assert!(is_whitespace_only_diff(
3029            "    return 1;\n    return 2;\n",
3030            "      return 1;\n      return 2;\n"
3031        ));
3032        // Same content, extra blank lines
3033        assert!(is_whitespace_only_diff(
3034            "return 1;\nreturn 2;\n",
3035            "return 1;\n\nreturn 2;\n"
3036        ));
3037    }
3038
3039    #[test]
3040    fn test_is_whitespace_only_diff_false() {
3041        // Different content
3042        assert!(!is_whitespace_only_diff(
3043            "    return 1;\n",
3044            "    return 2;\n"
3045        ));
3046        // Added code
3047        assert!(!is_whitespace_only_diff(
3048            "return 1;\n",
3049            "return 1;\nconsole.log('x');\n"
3050        ));
3051    }
3052
3053    #[test]
3054    fn test_ts_interface_both_add_different_fields() {
3055        let base = "interface Config {\n    name: string;\n}\n";
3056        let ours = "interface Config {\n    name: string;\n    age: number;\n}\n";
3057        let theirs = "interface Config {\n    name: string;\n    email: string;\n}\n";
3058        let result = entity_merge(base, ours, theirs, "test.ts");
3059        eprintln!("TS interface: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3060        eprintln!("Content: {:?}", result.content);
3061        assert!(
3062            result.is_clean(),
3063            "Both adding different fields to TS interface should merge. Conflicts: {:?}",
3064            result.conflicts,
3065        );
3066        assert!(result.content.contains("age"));
3067        assert!(result.content.contains("email"));
3068    }
3069
3070    #[test]
3071    fn test_rust_enum_both_add_different_variants() {
3072        let base = "enum Color {\n    Red,\n    Blue,\n}\n";
3073        let ours = "enum Color {\n    Red,\n    Blue,\n    Green,\n}\n";
3074        let theirs = "enum Color {\n    Red,\n    Blue,\n    Yellow,\n}\n";
3075        let result = entity_merge(base, ours, theirs, "test.rs");
3076        eprintln!("Rust enum: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3077        eprintln!("Content: {:?}", result.content);
3078        assert!(
3079            result.is_clean(),
3080            "Both adding different enum variants should merge. Conflicts: {:?}",
3081            result.conflicts,
3082        );
3083        assert!(result.content.contains("Green"));
3084        assert!(result.content.contains("Yellow"));
3085    }
3086
3087    #[test]
3088    fn test_python_both_add_different_decorators() {
3089        // Both add different decorators to the same function
3090        let base = "def foo():\n    return 1\n\ndef bar():\n    return 2\n";
3091        let ours = "@cache\ndef foo():\n    return 1\n\ndef bar():\n    return 2\n";
3092        let theirs = "@deprecated\ndef foo():\n    return 1\n\ndef bar():\n    return 2\n";
3093        let result = entity_merge(base, ours, theirs, "test.py");
3094        assert!(
3095            result.is_clean(),
3096            "Both adding different decorators should merge. Conflicts: {:?}",
3097            result.conflicts,
3098        );
3099        assert!(result.content.contains("@cache"));
3100        assert!(result.content.contains("@deprecated"));
3101        assert!(result.content.contains("def foo()"));
3102    }
3103
3104    #[test]
3105    fn test_decorator_plus_body_change() {
3106        // One adds decorator, other modifies body — should merge both
3107        let base = "def foo():\n    return 1\n";
3108        let ours = "@cache\ndef foo():\n    return 1\n";
3109        let theirs = "def foo():\n    return 42\n";
3110        let result = entity_merge(base, ours, theirs, "test.py");
3111        assert!(
3112            result.is_clean(),
3113            "Decorator + body change should merge. Conflicts: {:?}",
3114            result.conflicts,
3115        );
3116        assert!(result.content.contains("@cache"));
3117        assert!(result.content.contains("return 42"));
3118    }
3119
3120    #[test]
3121    fn test_ts_class_decorator_merge() {
3122        // TypeScript decorators on class methods — both add different decorators
3123        let base = "class Foo {\n    bar() {\n        return 1;\n    }\n}\n";
3124        let ours = "class Foo {\n    @Injectable()\n    bar() {\n        return 1;\n    }\n}\n";
3125        let theirs = "class Foo {\n    @Deprecated()\n    bar() {\n        return 1;\n    }\n}\n";
3126        let result = entity_merge(base, ours, theirs, "test.ts");
3127        assert!(
3128            result.is_clean(),
3129            "Both adding different decorators to same method should merge. Conflicts: {:?}",
3130            result.conflicts,
3131        );
3132        assert!(result.content.contains("@Injectable()"));
3133        assert!(result.content.contains("@Deprecated()"));
3134        assert!(result.content.contains("bar()"));
3135    }
3136
3137    #[test]
3138    fn test_non_adjacent_intra_function_changes() {
3139        let base = r#"export function process(data: any) {
3140    const validated = validate(data);
3141    const transformed = transform(validated);
3142    const saved = save(transformed);
3143    return saved;
3144}
3145"#;
3146        let ours = r#"export function process(data: any) {
3147    const validated = validate(data);
3148    const transformed = transform(validated);
3149    const saved = save(transformed);
3150    console.log("saved", saved);
3151    return saved;
3152}
3153"#;
3154        let theirs = r#"export function process(data: any) {
3155    console.log("input", data);
3156    const validated = validate(data);
3157    const transformed = transform(validated);
3158    const saved = save(transformed);
3159    return saved;
3160}
3161"#;
3162        let result = entity_merge(base, ours, theirs, "test.ts");
3163        assert!(
3164            result.is_clean(),
3165            "Non-adjacent changes within same function should merge via diffy. Conflicts: {:?}",
3166            result.conflicts,
3167        );
3168        assert!(result.content.contains("console.log(\"saved\""));
3169        assert!(result.content.contains("console.log(\"input\""));
3170    }
3171
3172    #[test]
3173    fn test_method_reordering_with_modification() {
3174        // Agent A reorders methods in class, Agent B modifies one method
3175        // Inner entity merge matches by name, so reordering should be transparent
3176        let base = r#"class Service {
3177    getUser(id: string) {
3178        return db.find(id);
3179    }
3180
3181    createUser(data: any) {
3182        return db.create(data);
3183    }
3184
3185    deleteUser(id: string) {
3186        return db.delete(id);
3187    }
3188}
3189"#;
3190        // Ours: reorder methods (move deleteUser before createUser)
3191        let ours = r#"class Service {
3192    getUser(id: string) {
3193        return db.find(id);
3194    }
3195
3196    deleteUser(id: string) {
3197        return db.delete(id);
3198    }
3199
3200    createUser(data: any) {
3201        return db.create(data);
3202    }
3203}
3204"#;
3205        // Theirs: modify getUser
3206        let theirs = r#"class Service {
3207    getUser(id: string) {
3208        console.log("fetching", id);
3209        return db.find(id);
3210    }
3211
3212    createUser(data: any) {
3213        return db.create(data);
3214    }
3215
3216    deleteUser(id: string) {
3217        return db.delete(id);
3218    }
3219}
3220"#;
3221        let result = entity_merge(base, ours, theirs, "test.ts");
3222        eprintln!("Method reorder: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3223        eprintln!("Content:\n{}", result.content);
3224        assert!(
3225            result.is_clean(),
3226            "Method reordering + modification should merge. Conflicts: {:?}",
3227            result.conflicts,
3228        );
3229        assert!(result.content.contains("console.log(\"fetching\""), "Should contain theirs modification");
3230        assert!(result.content.contains("deleteUser"), "Should have deleteUser");
3231        assert!(result.content.contains("createUser"), "Should have createUser");
3232    }
3233
3234    #[test]
3235    fn test_doc_comment_plus_body_change() {
3236        // One side adds JSDoc comment, other modifies function body
3237        // Doc comments are part of the entity region — they should merge with body changes
3238        let base = r#"export function calculate(a: number, b: number): number {
3239    return a + b;
3240}
3241"#;
3242        let ours = r#"/**
3243 * Calculate the sum of two numbers.
3244 * @param a - First number
3245 * @param b - Second number
3246 */
3247export function calculate(a: number, b: number): number {
3248    return a + b;
3249}
3250"#;
3251        let theirs = r#"export function calculate(a: number, b: number): number {
3252    const result = a + b;
3253    console.log("result:", result);
3254    return result;
3255}
3256"#;
3257        let result = entity_merge(base, ours, theirs, "test.ts");
3258        eprintln!("Doc comment + body: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3259        eprintln!("Content:\n{}", result.content);
3260        // This tests whether weave can merge doc comment additions with body changes
3261    }
3262
3263    #[test]
3264    fn test_both_add_different_guard_clauses() {
3265        // Both add different guard clauses at the start of a function
3266        let base = r#"export function processOrder(order: Order): Result {
3267    const total = calculateTotal(order);
3268    return { success: true, total };
3269}
3270"#;
3271        let ours = r#"export function processOrder(order: Order): Result {
3272    if (!order) throw new Error("Order required");
3273    const total = calculateTotal(order);
3274    return { success: true, total };
3275}
3276"#;
3277        let theirs = r#"export function processOrder(order: Order): Result {
3278    if (order.items.length === 0) throw new Error("Empty order");
3279    const total = calculateTotal(order);
3280    return { success: true, total };
3281}
3282"#;
3283        let result = entity_merge(base, ours, theirs, "test.ts");
3284        eprintln!("Guard clauses: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3285        eprintln!("Content:\n{}", result.content);
3286        // Both add at same position — diffy may struggle since they're at the same insertion point
3287    }
3288
3289    #[test]
3290    fn test_both_modify_different_enum_variants() {
3291        // One modifies a variant's value, other adds new variants
3292        let base = r#"enum Status {
3293    Active = "active",
3294    Inactive = "inactive",
3295    Pending = "pending",
3296}
3297"#;
3298        let ours = r#"enum Status {
3299    Active = "active",
3300    Inactive = "disabled",
3301    Pending = "pending",
3302}
3303"#;
3304        let theirs = r#"enum Status {
3305    Active = "active",
3306    Inactive = "inactive",
3307    Pending = "pending",
3308    Deleted = "deleted",
3309}
3310"#;
3311        let result = entity_merge(base, ours, theirs, "test.ts");
3312        eprintln!("Enum modify+add: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3313        eprintln!("Content:\n{}", result.content);
3314        assert!(
3315            result.is_clean(),
3316            "Modify variant + add new variant should merge. Conflicts: {:?}",
3317            result.conflicts,
3318        );
3319        assert!(result.content.contains("\"disabled\""), "Should have modified Inactive");
3320        assert!(result.content.contains("Deleted"), "Should have new Deleted variant");
3321    }
3322
3323    #[test]
3324    fn test_config_object_field_additions() {
3325        // Both add different fields to a config object (exported const)
3326        let base = r#"export const config = {
3327    timeout: 5000,
3328    retries: 3,
3329};
3330"#;
3331        let ours = r#"export const config = {
3332    timeout: 5000,
3333    retries: 3,
3334    maxConnections: 10,
3335};
3336"#;
3337        let theirs = r#"export const config = {
3338    timeout: 5000,
3339    retries: 3,
3340    logLevel: "info",
3341};
3342"#;
3343        let result = entity_merge(base, ours, theirs, "test.ts");
3344        eprintln!("Config fields: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3345        eprintln!("Content:\n{}", result.content);
3346        // This tests whether inner entity merge handles object literals
3347        // (it probably won't since object fields aren't extracted as members the same way)
3348    }
3349
3350    #[test]
3351    fn test_rust_impl_block_both_add_methods() {
3352        // Both add different methods to a Rust impl block
3353        let base = r#"impl Calculator {
3354    fn add(&self, a: i32, b: i32) -> i32 {
3355        a + b
3356    }
3357}
3358"#;
3359        let ours = r#"impl Calculator {
3360    fn add(&self, a: i32, b: i32) -> i32 {
3361        a + b
3362    }
3363
3364    fn multiply(&self, a: i32, b: i32) -> i32 {
3365        a * b
3366    }
3367}
3368"#;
3369        let theirs = r#"impl Calculator {
3370    fn add(&self, a: i32, b: i32) -> i32 {
3371        a + b
3372    }
3373
3374    fn divide(&self, a: i32, b: i32) -> i32 {
3375        a / b
3376    }
3377}
3378"#;
3379        let result = entity_merge(base, ours, theirs, "test.rs");
3380        eprintln!("Rust impl: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3381        eprintln!("Content:\n{}", result.content);
3382        assert!(
3383            result.is_clean(),
3384            "Both adding methods to Rust impl should merge. Conflicts: {:?}",
3385            result.conflicts,
3386        );
3387        assert!(result.content.contains("multiply"), "Should have multiply");
3388        assert!(result.content.contains("divide"), "Should have divide");
3389    }
3390
3391    #[test]
3392    fn test_rust_doc_comment_plus_body_change() {
3393        // One side adds Rust doc comment, other modifies body
3394        // Comment bundling ensures the doc comment is part of the entity
3395        let base = r#"fn add(a: i32, b: i32) -> i32 {
3396    a + b
3397}
3398
3399fn subtract(a: i32, b: i32) -> i32 {
3400    a - b
3401}
3402"#;
3403        let ours = r#"/// Adds two numbers together.
3404fn add(a: i32, b: i32) -> i32 {
3405    a + b
3406}
3407
3408fn subtract(a: i32, b: i32) -> i32 {
3409    a - b
3410}
3411"#;
3412        let theirs = r#"fn add(a: i32, b: i32) -> i32 {
3413    a + b
3414}
3415
3416fn subtract(a: i32, b: i32) -> i32 {
3417    a - b - 1
3418}
3419"#;
3420        let result = entity_merge(base, ours, theirs, "test.rs");
3421        assert!(
3422            result.is_clean(),
3423            "Rust doc comment + body change should merge. Conflicts: {:?}",
3424            result.conflicts,
3425        );
3426        assert!(result.content.contains("/// Adds two numbers"), "Should have ours doc comment");
3427        assert!(result.content.contains("a - b - 1"), "Should have theirs body change");
3428    }
3429
3430    #[test]
3431    fn test_both_add_different_doc_comments() {
3432        // Both add doc comments to different functions — should merge cleanly
3433        let base = r#"fn add(a: i32, b: i32) -> i32 {
3434    a + b
3435}
3436
3437fn subtract(a: i32, b: i32) -> i32 {
3438    a - b
3439}
3440"#;
3441        let ours = r#"/// Adds two numbers.
3442fn add(a: i32, b: i32) -> i32 {
3443    a + b
3444}
3445
3446fn subtract(a: i32, b: i32) -> i32 {
3447    a - b
3448}
3449"#;
3450        let theirs = r#"fn add(a: i32, b: i32) -> i32 {
3451    a + b
3452}
3453
3454/// Subtracts b from a.
3455fn subtract(a: i32, b: i32) -> i32 {
3456    a - b
3457}
3458"#;
3459        let result = entity_merge(base, ours, theirs, "test.rs");
3460        assert!(
3461            result.is_clean(),
3462            "Both adding doc comments to different functions should merge. Conflicts: {:?}",
3463            result.conflicts,
3464        );
3465        assert!(result.content.contains("/// Adds two numbers"), "Should have add's doc comment");
3466        assert!(result.content.contains("/// Subtracts b from a"), "Should have subtract's doc comment");
3467    }
3468
3469    #[test]
3470    fn test_go_import_block_both_add_different() {
3471        // Go uses import (...) blocks — both add different imports
3472        let base = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3473        let ours = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"strings\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3474        let theirs = "package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"io\"\n)\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n";
3475        let result = entity_merge(base, ours, theirs, "main.go");
3476        eprintln!("Go import block: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3477        eprintln!("Content:\n{}", result.content);
3478        // This tests whether Go import blocks (a single entity) get inner-merged
3479    }
3480
3481    #[test]
3482    fn test_python_class_both_add_methods() {
3483        // Python class — both add different methods
3484        let base = "class Calculator:\n    def add(self, a, b):\n        return a + b\n";
3485        let ours = "class Calculator:\n    def add(self, a, b):\n        return a + b\n\n    def multiply(self, a, b):\n        return a * b\n";
3486        let theirs = "class Calculator:\n    def add(self, a, b):\n        return a + b\n\n    def divide(self, a, b):\n        return a / b\n";
3487        let result = entity_merge(base, ours, theirs, "test.py");
3488        eprintln!("Python class: clean={}, conflicts={:?}", result.is_clean(), result.conflicts);
3489        eprintln!("Content:\n{}", result.content);
3490        assert!(
3491            result.is_clean(),
3492            "Both adding methods to Python class should merge. Conflicts: {:?}",
3493            result.conflicts,
3494        );
3495        assert!(result.content.contains("multiply"), "Should have multiply");
3496        assert!(result.content.contains("divide"), "Should have divide");
3497    }
3498
3499    #[test]
3500    fn test_interstitial_conflict_not_silently_embedded() {
3501        // Regression test: when interstitial content between entities has a
3502        // both-modified conflict, merge_interstitials must report it as a real
3503        // conflict instead of silently embedding raw diffy markers and claiming
3504        // is_clean=true.
3505        //
3506        // Scenario: a barrel export file (index.ts) with comments between
3507        // export statements. Both sides modify the SAME interstitial comment
3508        // block differently. The exports are the entities; the comment between
3509        // them is interstitial content that goes through merge_interstitials
3510        // → diffy, which cannot auto-merge conflicting edits.
3511        let base = r#"export { alpha } from "./alpha";
3512
3513// Section: data utilities
3514// TODO: add more exports here
3515
3516export { beta } from "./beta";
3517"#;
3518        let ours = r#"export { alpha } from "./alpha";
3519
3520// Section: data utilities (sorting)
3521// Sorting helpers for list views
3522
3523export { beta } from "./beta";
3524"#;
3525        let theirs = r#"export { alpha } from "./alpha";
3526
3527// Section: data utilities (filtering)
3528// Filtering helpers for search views
3529
3530export { beta } from "./beta";
3531"#;
3532        let result = entity_merge(base, ours, theirs, "index.ts");
3533
3534        // The key assertions:
3535        // 1. If the content has conflict markers, is_clean() MUST be false
3536        let has_markers = result.content.contains("<<<<<<<") || result.content.contains(">>>>>>>");
3537        if has_markers {
3538            assert!(
3539                !result.is_clean(),
3540                "BUG: is_clean()=true but merged content has conflict markers!\n\
3541                 stats: {}\nconflicts: {:?}\ncontent:\n{}",
3542                result.stats, result.conflicts, result.content
3543            );
3544            assert!(
3545                result.stats.entities_conflicted > 0,
3546                "entities_conflicted should be > 0 when markers are present"
3547            );
3548        }
3549
3550        // 2. If it was resolved cleanly, no markers should exist
3551        if result.is_clean() {
3552            assert!(
3553                !has_markers,
3554                "Clean merge should not contain conflict markers!\ncontent:\n{}",
3555                result.content
3556            );
3557        }
3558    }
3559
3560    #[test]
3561    fn test_pre_conflicted_input_not_treated_as_clean() {
3562        // Regression test for AU/AA conflicts: git can store conflict markers
3563        // directly into stage blobs. Weave must not return is_clean=true.
3564        let base = "";
3565        let theirs = "";
3566        let ours = r#"/**
3567 * MIT License
3568 */
3569
3570<<<<<<<< HEAD:src/lib/exports/index.ts
3571export { renderDocToBuffer } from "./doc-exporter";
3572export type { ExportOptions, ExportMetadata, RenderContext } from "./types";
3573========
3574export * from "./editor";
3575export * from "./types";
3576>>>>>>>> feature:packages/core/src/editor/index.ts
3577"#;
3578        let result = entity_merge(base, ours, theirs, "index.ts");
3579
3580        assert!(
3581            !result.is_clean(),
3582            "Pre-conflicted input must not be reported as clean!\n\
3583             stats: {}\nconflicts: {:?}",
3584            result.stats, result.conflicts,
3585        );
3586        assert!(result.stats.entities_conflicted > 0);
3587        assert!(!result.conflicts.is_empty());
3588    }
3589
3590    #[test]
3591    fn test_multi_line_signature_classified_as_syntax() {
3592        // Multi-line parameter list: changing a param should be Syntax, not Functional
3593        let base = "function process(\n    a: number,\n    b: string\n) {\n    return a;\n}\n";
3594        let ours = "function process(\n    a: number,\n    b: string,\n    c: boolean\n) {\n    return a;\n}\n";
3595        let theirs = "function process(\n    a: number,\n    b: number\n) {\n    return a;\n}\n";
3596        let complexity = crate::conflict::classify_conflict(Some(base), Some(ours), Some(theirs));
3597        assert_eq!(
3598            complexity,
3599            crate::conflict::ConflictComplexity::Syntax,
3600            "Multi-line signature change should be classified as Syntax, got {:?}",
3601            complexity
3602        );
3603    }
3604
3605    #[test]
3606    fn test_grouped_import_merge_preserves_groups() {
3607        let base = "import os\nimport sys\n\nfrom collections import OrderedDict\nfrom typing import List\n";
3608        let ours = "import os\nimport sys\nimport json\n\nfrom collections import OrderedDict\nfrom typing import List\n";
3609        let theirs = "import os\nimport sys\n\nfrom collections import OrderedDict\nfrom collections import defaultdict\nfrom typing import List\n";
3610        let result = merge_imports_commutatively(base, ours, theirs);
3611        // json should be in the first group (stdlib), defaultdict in the second (collections)
3612        let lines: Vec<&str> = result.lines().collect();
3613        let json_idx = lines.iter().position(|l| l.contains("json"));
3614        let blank_idx = lines.iter().position(|l| l.trim().is_empty());
3615        let defaultdict_idx = lines.iter().position(|l| l.contains("defaultdict"));
3616        assert!(json_idx.is_some(), "json import should be present");
3617        assert!(blank_idx.is_some(), "blank line separator should be present");
3618        assert!(defaultdict_idx.is_some(), "defaultdict import should be present");
3619        // json should come before the blank line, defaultdict after
3620        assert!(json_idx.unwrap() < blank_idx.unwrap(), "json should be in first group");
3621        assert!(defaultdict_idx.unwrap() > blank_idx.unwrap(), "defaultdict should be in second group");
3622    }
3623
3624    #[test]
3625    fn test_configurable_duplicate_threshold() {
3626        // Create entities with 15 same-name entities
3627        let entities: Vec<SemanticEntity> = (0..15).map(|i| SemanticEntity {
3628            id: format!("test::function::test_{}", i),
3629            file_path: "test.ts".to_string(),
3630            entity_type: "function".to_string(),
3631            name: "test".to_string(),
3632            parent_id: None,
3633            content: format!("function test() {{ return {}; }}", i),
3634            content_hash: format!("hash_{}", i),
3635            structural_hash: None,
3636            start_line: i * 3 + 1,
3637            end_line: i * 3 + 3,
3638            metadata: None,
3639        }).collect();
3640        // Default threshold (10): should trigger
3641        assert!(has_excessive_duplicates(&entities));
3642        // Set threshold to 20: should not trigger
3643        std::env::set_var("WEAVE_MAX_DUPLICATES", "20");
3644        assert!(!has_excessive_duplicates(&entities));
3645        std::env::remove_var("WEAVE_MAX_DUPLICATES");
3646    }
3647}