Skip to main content

aura_merge/
lib.rs

1//! # aura-merge
2//!
3//! Structured 3-way merge engine that picks the right strategy per file type:
4//!
5//! - **Code files** — scaffold extraction (the non-function portions) plus
6//!   function-level merge primitives.
7//! - **JSON / YAML / TOML** — key-level deep merge with conflict reporting.
8//! - **Text / Markdown** — line-level 3-way merge.
9//! - **Everything else** — file-level hash compare as a safe fallback.
10//!
11//! Pure stdlib, no external dependencies.
12//!
13//! Originally extracted from [Aura](https://auravcs.com), the semantic
14//! version control engine.
15
16use std::collections::{BTreeMap, HashMap};
17
18// ─── File Type Detection ───────────────────────────────────────────────────
19
20#[derive(Debug, Clone, PartialEq)]
21pub enum FileType {
22    Code,    // .rs, .py, .ts, .js, .go, .java, etc — AST-parseable
23    Json,    // .json
24    Yaml,    // .yaml, .yml
25    Toml,    // .toml
26    Env,     // .env, .env.*
27    Text,    // .md, .txt, .csv, .html, .css, etc
28    Binary,  // images, compiled, etc
29    Ignored, // lockfiles, node_modules, etc
30}
31
32pub fn detect_file_type(path: &str) -> FileType {
33    let lower = path.to_lowercase();
34
35    // Ignored files — never sync
36    if lower.contains("node_modules/")
37        || lower.contains("target/")
38        || lower.contains(".git/")
39        || lower.contains(".aura/")
40        || lower.contains("__pycache__/")
41        || lower.contains(".next/")
42        || lower.contains("dist/")
43        || lower.contains("build/")
44    {
45        return FileType::Ignored;
46    }
47
48    // Lockfiles — never auto-merge
49    if lower.ends_with("cargo.lock")
50        || lower.ends_with("package-lock.json")
51        || lower.ends_with("yarn.lock")
52        || lower.ends_with("pnpm-lock.yaml")
53        || lower.ends_with("poetry.lock")
54        || lower.ends_with("gemfile.lock")
55    {
56        return FileType::Ignored;
57    }
58
59    // Binary
60    if lower.ends_with(".png")
61        || lower.ends_with(".jpg")
62        || lower.ends_with(".jpeg")
63        || lower.ends_with(".gif")
64        || lower.ends_with(".ico")
65        || lower.ends_with(".woff")
66        || lower.ends_with(".woff2")
67        || lower.ends_with(".ttf")
68        || lower.ends_with(".eot")
69        || lower.ends_with(".zip")
70        || lower.ends_with(".tar")
71        || lower.ends_with(".gz")
72        || lower.ends_with(".exe")
73        || lower.ends_with(".dll")
74        || lower.ends_with(".so")
75        || lower.ends_with(".dylib")
76        || lower.ends_with(".pdf")
77    {
78        return FileType::Binary;
79    }
80
81    // Structured formats
82    if lower.ends_with(".json") {
83        return FileType::Json;
84    }
85    if lower.ends_with(".yaml") || lower.ends_with(".yml") {
86        return FileType::Yaml;
87    }
88    if lower.ends_with(".toml") {
89        return FileType::Toml;
90    }
91    if lower.ends_with(".env") || lower.contains(".env.") {
92        return FileType::Env;
93    }
94
95    // Code (AST-parseable)
96    if lower.ends_with(".rs")
97        || lower.ends_with(".py")
98        || lower.ends_with(".ts")
99        || lower.ends_with(".tsx")
100        || lower.ends_with(".js")
101        || lower.ends_with(".jsx")
102        || lower.ends_with(".go")
103        || lower.ends_with(".java")
104        || lower.ends_with(".cs")
105        || lower.ends_with(".rb")
106        || lower.ends_with(".cpp")
107        || lower.ends_with(".cc")
108        || lower.ends_with(".c")
109        || lower.ends_with(".h")
110        || lower.ends_with(".hpp")
111        || lower.ends_with(".php")
112        || lower.ends_with(".swift")
113        || lower.ends_with(".kt")
114    {
115        return FileType::Code;
116    }
117
118    // Everything else = text
119    FileType::Text
120}
121
122// ─── Merge Results ─────────────────────────────────────────────────────────
123
124#[derive(Debug)]
125pub enum MergeResult {
126    /// Clean merge — no conflicts
127    Merged(String),
128    /// Has conflicts that need resolution
129    Conflicts {
130        merged: String,
131        conflict_count: usize,
132        conflict_details: Vec<ConflictDetail>,
133    },
134    /// Identical — no changes needed
135    Identical,
136    /// Cannot merge (binary, etc)
137    CannotMerge(String),
138}
139
140#[derive(Debug, Clone)]
141pub struct ConflictDetail {
142    pub location: String, // key path for JSON, line number for text
143    pub local_value: String,
144    pub remote_value: String,
145}
146
147// ─── JSON Deep Merge ───────────────────────────────────────────────────────
148
149/// Merge two JSON values. base = common ancestor (if known), local = ours, remote = theirs.
150/// If no base, treat it as a 2-way merge (conflicts when same key differs).
151pub fn merge_json(local: &str, remote: &str, base: Option<&str>) -> MergeResult {
152    let local_val: serde_json::Value = match serde_json::from_str(local) {
153        Ok(v) => v,
154        Err(e) => return MergeResult::CannotMerge(format!("Local JSON parse error: {}", e)),
155    };
156    let remote_val: serde_json::Value = match serde_json::from_str(remote) {
157        Ok(v) => v,
158        Err(e) => return MergeResult::CannotMerge(format!("Remote JSON parse error: {}", e)),
159    };
160
161    if local_val == remote_val {
162        return MergeResult::Identical;
163    }
164
165    let base_val = base.and_then(|b| serde_json::from_str(b).ok());
166    let mut conflicts = Vec::new();
167    let merged = deep_merge_value(
168        &local_val,
169        &remote_val,
170        base_val.as_ref(),
171        "$",
172        &mut conflicts,
173    );
174
175    if let Ok(pretty) = serde_json::to_string_pretty(&merged) {
176        if conflicts.is_empty() {
177            MergeResult::Merged(pretty)
178        } else {
179            MergeResult::Conflicts {
180                merged: pretty,
181                conflict_count: conflicts.len(),
182                conflict_details: conflicts,
183            }
184        }
185    } else {
186        MergeResult::CannotMerge("Failed to serialize merged JSON".to_string())
187    }
188}
189
190fn deep_merge_value(
191    local: &serde_json::Value,
192    remote: &serde_json::Value,
193    base: Option<&serde_json::Value>,
194    path: &str,
195    conflicts: &mut Vec<ConflictDetail>,
196) -> serde_json::Value {
197    use serde_json::Value;
198
199    match (local, remote) {
200        // Both objects — merge keys
201        (Value::Object(l), Value::Object(r)) => {
202            let base_obj = base.and_then(|b| b.as_object());
203            let mut merged = serde_json::Map::new();
204
205            // All keys from both sides
206            let mut all_keys: Vec<String> = l.keys().chain(r.keys()).cloned().collect();
207            all_keys.sort();
208            all_keys.dedup();
209
210            for key in &all_keys {
211                let l_val = l.get(key);
212                let r_val = r.get(key);
213                let b_val = base_obj.and_then(|b| b.get(key));
214                let child_path = format!("{}.{}", path, key);
215
216                match (l_val, r_val) {
217                    (Some(lv), Some(rv)) => {
218                        if lv == rv {
219                            // Same value — no conflict
220                            merged.insert(key.clone(), lv.clone());
221                        } else if let Some(bv) = b_val {
222                            // 3-way: check who changed from base
223                            if lv == bv {
224                                // Local unchanged, remote changed → take remote
225                                merged.insert(key.clone(), rv.clone());
226                            } else if rv == bv {
227                                // Remote unchanged, local changed → take local
228                                merged.insert(key.clone(), lv.clone());
229                            } else {
230                                // Both changed from base — recurse or conflict
231                                let result =
232                                    deep_merge_value(lv, rv, Some(bv), &child_path, conflicts);
233                                merged.insert(key.clone(), result);
234                            }
235                        } else {
236                            // No base — 2-way merge, recurse for objects, conflict for scalars
237                            let result = deep_merge_value(lv, rv, None, &child_path, conflicts);
238                            merged.insert(key.clone(), result);
239                        }
240                    }
241                    (Some(lv), None) => {
242                        // Only in local
243                        if b_val.is_some() {
244                            // Was in base, remote deleted it — take remote's deletion
245                            // (don't include the key)
246                        } else {
247                            merged.insert(key.clone(), lv.clone());
248                        }
249                    }
250                    (None, Some(rv)) => {
251                        // Only in remote
252                        if b_val.is_some() {
253                            // Was in base, local deleted it — take local's deletion
254                        } else {
255                            merged.insert(key.clone(), rv.clone());
256                        }
257                    }
258                    (None, None) => unreachable!(),
259                }
260            }
261
262            Value::Object(merged)
263        }
264        // Both arrays — use remote if different (no element-level merge for now)
265        (Value::Array(_), Value::Array(_)) => {
266            if local == remote {
267                local.clone()
268            } else if let Some(bv) = base {
269                if local == bv {
270                    remote.clone()
271                } else if remote == bv {
272                    local.clone()
273                } else {
274                    // Both changed array — conflict, take local + note
275                    conflicts.push(ConflictDetail {
276                        location: path.to_string(),
277                        local_value: serde_json::to_string(local).unwrap_or_default(),
278                        remote_value: serde_json::to_string(remote).unwrap_or_default(),
279                    });
280                    local.clone()
281                }
282            } else {
283                conflicts.push(ConflictDetail {
284                    location: path.to_string(),
285                    local_value: serde_json::to_string(local).unwrap_or_default(),
286                    remote_value: serde_json::to_string(remote).unwrap_or_default(),
287                });
288                local.clone()
289            }
290        }
291        // Scalar conflict
292        _ => {
293            if local == remote {
294                local.clone()
295            } else {
296                conflicts.push(ConflictDetail {
297                    location: path.to_string(),
298                    local_value: serde_json::to_string(local).unwrap_or_default(),
299                    remote_value: serde_json::to_string(remote).unwrap_or_default(),
300                });
301                local.clone() // default to local on conflict
302            }
303        }
304    }
305}
306
307// ─── Line-Level Text Merge ─────────────────────────────────────────────────
308
309/// 3-way line-level merge for text files (markdown, CSS, HTML, plain text).
310/// If no base, falls back to 2-way merge.
311pub fn merge_text(local: &str, remote: &str, base: Option<&str>) -> MergeResult {
312    if local == remote {
313        return MergeResult::Identical;
314    }
315
316    let local_lines: Vec<&str> = local.lines().collect();
317    let remote_lines: Vec<&str> = remote.lines().collect();
318
319    if let Some(base_text) = base {
320        // 3-way merge
321        let base_lines: Vec<&str> = base_text.lines().collect();
322        merge_text_3way(&base_lines, &local_lines, &remote_lines)
323    } else {
324        // 2-way merge — find common lines, diff the rest
325        merge_text_2way(&local_lines, &remote_lines)
326    }
327}
328
329fn merge_text_3way(base: &[&str], local: &[&str], remote: &[&str]) -> MergeResult {
330    // Compute which lines each side added/removed vs base
331    let _local_diff = diff_lines(base, local);
332    let _remote_diff = diff_lines(base, remote);
333
334    let mut merged = Vec::new();
335    let mut conflicts = Vec::new();
336    let max_len = base.len().max(local.len()).max(remote.len());
337
338    let mut bi = 0;
339    let mut li = 0;
340    let mut ri = 0;
341
342    while bi < base.len() || li < local.len() || ri < remote.len() {
343        let b_line = base.get(bi).copied();
344        let l_line = local.get(li).copied();
345        let r_line = remote.get(ri).copied();
346
347        match (b_line, l_line, r_line) {
348            (Some(b), Some(l), Some(r)) if b == l && b == r => {
349                // All same — no change
350                merged.push(l.to_string());
351                bi += 1;
352                li += 1;
353                ri += 1;
354            }
355            (Some(b), Some(l), Some(r)) if b == l && b != r => {
356                // Remote changed this line, local didn't → take remote
357                merged.push(r.to_string());
358                bi += 1;
359                li += 1;
360                ri += 1;
361            }
362            (Some(b), Some(l), Some(r)) if b != l && b == r => {
363                // Local changed this line, remote didn't → take local
364                merged.push(l.to_string());
365                bi += 1;
366                li += 1;
367                ri += 1;
368            }
369            (Some(b), Some(l), Some(r)) if b != l && b != r => {
370                if l == r {
371                    // Both made the same change
372                    merged.push(l.to_string());
373                } else {
374                    // True conflict — both changed differently
375                    conflicts.push(ConflictDetail {
376                        location: format!("line {}", bi + 1),
377                        local_value: l.to_string(),
378                        remote_value: r.to_string(),
379                    });
380                    merged.push("<<<<<<< LOCAL".to_string());
381                    merged.push(l.to_string());
382                    merged.push("=======".to_string());
383                    merged.push(r.to_string());
384                    merged.push(">>>>>>> REMOTE".to_string());
385                }
386                bi += 1;
387                li += 1;
388                ri += 1;
389            }
390            // Handle insertions/deletions at end
391            (None, Some(l), Some(r)) => {
392                if l == r {
393                    merged.push(l.to_string());
394                    li += 1;
395                    ri += 1;
396                } else {
397                    merged.push(l.to_string());
398                    merged.push(r.to_string());
399                    li += 1;
400                    ri += 1;
401                }
402            }
403            (None, Some(l), None) => {
404                merged.push(l.to_string());
405                li += 1;
406            }
407            (None, None, Some(r)) => {
408                merged.push(r.to_string());
409                ri += 1;
410            }
411            (Some(_), Some(l), None) => {
412                // Remote deleted, local kept
413                merged.push(l.to_string());
414                bi += 1;
415                li += 1;
416            }
417            (Some(_), None, Some(r)) => {
418                // Local deleted, remote kept
419                merged.push(r.to_string());
420                bi += 1;
421                ri += 1;
422            }
423            (Some(_), None, None) => {
424                // Both deleted
425                bi += 1;
426            }
427            _ => break,
428        }
429
430        if bi > max_len + 100 {
431            break;
432        } // safety
433    }
434
435    let result = merged.join("\n");
436    if conflicts.is_empty() {
437        MergeResult::Merged(result)
438    } else {
439        MergeResult::Conflicts {
440            merged: result,
441            conflict_count: conflicts.len(),
442            conflict_details: conflicts,
443        }
444    }
445}
446
447fn merge_text_2way(local: &[&str], remote: &[&str]) -> MergeResult {
448    // Simple 2-way: find common prefix/suffix, diff the middle
449    let mut conflicts = Vec::new();
450
451    // Common prefix
452    let prefix_len = local
453        .iter()
454        .zip(remote.iter())
455        .take_while(|(a, b)| a == b)
456        .count();
457
458    // Common suffix (from the end)
459    let suffix_len = local
460        .iter()
461        .rev()
462        .zip(remote.iter().rev())
463        .take_while(|(a, b)| a == b)
464        .count();
465
466    let l_middle = &local[prefix_len..local.len().saturating_sub(suffix_len)];
467    let r_middle = &remote[prefix_len..remote.len().saturating_sub(suffix_len)];
468
469    if l_middle.is_empty() && r_middle.is_empty() {
470        return MergeResult::Identical;
471    }
472
473    // Build merged: prefix + both middles (conflict if both non-empty) + suffix
474    let mut merged: Vec<String> = local[..prefix_len].iter().map(|s| s.to_string()).collect();
475
476    if l_middle.is_empty() {
477        // Only remote has changes
478        merged.extend(r_middle.iter().map(|s| s.to_string()));
479    } else if r_middle.is_empty() {
480        // Only local has changes
481        merged.extend(l_middle.iter().map(|s| s.to_string()));
482    } else {
483        // Both have changes in the middle — conflict
484        conflicts.push(ConflictDetail {
485            location: format!("lines {}-{}", prefix_len + 1, prefix_len + l_middle.len()),
486            local_value: l_middle.join("\n"),
487            remote_value: r_middle.join("\n"),
488        });
489        merged.push("<<<<<<< LOCAL".to_string());
490        merged.extend(l_middle.iter().map(|s| s.to_string()));
491        merged.push("=======".to_string());
492        merged.extend(r_middle.iter().map(|s| s.to_string()));
493        merged.push(">>>>>>> REMOTE".to_string());
494    }
495
496    let suffix_start = local.len().saturating_sub(suffix_len);
497    merged.extend(local[suffix_start..].iter().map(|s| s.to_string()));
498
499    let result = merged.join("\n");
500    if conflicts.is_empty() {
501        MergeResult::Merged(result)
502    } else {
503        MergeResult::Conflicts {
504            merged: result,
505            conflict_count: conflicts.len(),
506            conflict_details: conflicts,
507        }
508    }
509}
510
511fn diff_lines<'a>(base: &[&'a str], modified: &[&'a str]) -> Vec<(usize, &'a str)> {
512    // Simple: find lines in modified that aren't in base
513    modified
514        .iter()
515        .enumerate()
516        .filter(|(_, line)| !base.contains(line))
517        .map(|(i, line)| (i, *line))
518        .collect()
519}
520
521// ─── Key-Value Merge (ENV files) ───────────────────────────────────────────
522
523/// Merge .env files by key. Same key with different values = conflict.
524pub fn merge_env(local: &str, remote: &str) -> MergeResult {
525    if local == remote {
526        return MergeResult::Identical;
527    }
528
529    let local_map = parse_env(local);
530    let remote_map = parse_env(remote);
531    let mut merged = BTreeMap::new();
532    let mut conflicts = Vec::new();
533
534    // All keys
535    let mut all_keys: Vec<&String> = local_map.keys().chain(remote_map.keys()).collect();
536    all_keys.sort();
537    all_keys.dedup();
538
539    for key in all_keys {
540        match (local_map.get(key), remote_map.get(key)) {
541            (Some(lv), Some(rv)) => {
542                if lv == rv {
543                    merged.insert(key.clone(), lv.clone());
544                } else {
545                    conflicts.push(ConflictDetail {
546                        location: key.clone(),
547                        local_value: lv.clone(),
548                        remote_value: rv.clone(),
549                    });
550                    merged.insert(key.clone(), lv.clone()); // default to local
551                }
552            }
553            (Some(lv), None) => {
554                merged.insert(key.clone(), lv.clone());
555            }
556            (None, Some(rv)) => {
557                merged.insert(key.clone(), rv.clone());
558            }
559            (None, None) => {}
560        }
561    }
562
563    let result: String = merged
564        .iter()
565        .map(|(k, v)| format!("{}={}", k, v))
566        .collect::<Vec<_>>()
567        .join("\n");
568
569    if conflicts.is_empty() {
570        MergeResult::Merged(result)
571    } else {
572        MergeResult::Conflicts {
573            merged: result,
574            conflict_count: conflicts.len(),
575            conflict_details: conflicts,
576        }
577    }
578}
579
580fn parse_env(content: &str) -> HashMap<String, String> {
581    content
582        .lines()
583        .filter(|line| !line.trim().is_empty() && !line.trim().starts_with('#'))
584        .filter_map(|line| {
585            let mut parts = line.splitn(2, '=');
586            let key = parts.next()?.trim().to_string();
587            let val = parts.next().unwrap_or("").to_string();
588            Some((key, val))
589        })
590        .collect()
591}
592
593// ─── Scaffold Extraction ───────────────────────────────────────────────────
594
595/// Extract the "scaffold" of a code file — everything OUTSIDE function bodies.
596/// Returns the scaffold text with function bodies replaced by placeholders.
597pub fn extract_scaffold(source: &str, functions: &[(String, String)]) -> String {
598    let mut scaffold = source.to_string();
599
600    // Sort functions by length descending to avoid nested replacement issues
601    let mut sorted_fns: Vec<&(String, String)> = functions.iter().collect();
602    sorted_fns.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
603
604    for (name, body) in &sorted_fns {
605        if let Some(pos) = scaffold.find(body.as_str()) {
606            let placeholder = format!("/* __AURA_FN_PLACEHOLDER::{} */", name);
607            scaffold.replace_range(pos..pos + body.len(), &placeholder);
608        }
609    }
610
611    scaffold
612}
613
614/// Re-insert function bodies into a scaffold.
615pub fn apply_scaffold(scaffold: &str, functions: &[(String, String)]) -> String {
616    let mut result = scaffold.to_string();
617    for (name, body) in functions {
618        let placeholder = format!("/* __AURA_FN_PLACEHOLDER::{} */", name);
619        result = result.replace(&placeholder, body);
620    }
621    result
622}
623
624// ─── Unified Merge Interface ───────────────────────────────────────────────
625
626/// Merge any two file versions based on file type.
627pub fn merge_file(path: &str, local: &str, remote: &str, base: Option<&str>) -> MergeResult {
628    let file_type = detect_file_type(path);
629
630    match file_type {
631        FileType::Json => merge_json(local, remote, base),
632        FileType::Env => merge_env(local, remote),
633        FileType::Yaml | FileType::Toml => {
634            // For YAML/TOML, use line-level merge (structural merge is future work)
635            merge_text(local, remote, base)
636        }
637        FileType::Text | FileType::Code => merge_text(local, remote, base),
638        FileType::Binary => MergeResult::CannotMerge("Binary file — cannot merge".to_string()),
639        FileType::Ignored => MergeResult::CannotMerge("Ignored file — should not sync".to_string()),
640    }
641}
642
643#[cfg(test)]
644mod tests {
645    use super::*;
646
647    #[test]
648    fn test_json_merge_no_conflict() {
649        let local = r#"{"port": 3001, "debug": true, "name": "app"}"#;
650        let remote = r#"{"port": 3000, "debug": false, "name": "app"}"#;
651        let base = r#"{"port": 3000, "debug": true, "name": "app"}"#;
652        match merge_json(local, remote, Some(base)) {
653            MergeResult::Merged(result) => {
654                let v: serde_json::Value = serde_json::from_str(&result).unwrap();
655                assert_eq!(v["port"], 3001); // local changed
656                assert_eq!(v["debug"], false); // remote changed
657                assert_eq!(v["name"], "app"); // unchanged
658            }
659            other => panic!("Expected Merged, got {:?}", other),
660        }
661    }
662
663    #[test]
664    fn test_json_merge_conflict() {
665        let local = r#"{"port": 3001}"#;
666        let remote = r#"{"port": 8080}"#;
667        match merge_json(local, remote, None) {
668            MergeResult::Conflicts { conflict_count, .. } => {
669                assert_eq!(conflict_count, 1);
670            }
671            other => panic!("Expected Conflicts, got {:?}", other),
672        }
673    }
674
675    #[test]
676    fn test_env_merge() {
677        let local = "PORT=3000\nDEBUG=true\nNEW_LOCAL=yes";
678        let remote = "PORT=3000\nDEBUG=false\nNEW_REMOTE=yes";
679        match merge_env(local, remote) {
680            MergeResult::Conflicts {
681                conflict_count,
682                merged,
683                ..
684            } => {
685                assert_eq!(conflict_count, 1); // DEBUG differs
686                assert!(merged.contains("NEW_LOCAL=yes"));
687                assert!(merged.contains("NEW_REMOTE=yes"));
688            }
689            other => panic!("Expected Conflicts, got {:?}", other),
690        }
691    }
692
693    #[test]
694    fn test_text_merge_3way() {
695        let base = "line1\nline2\nline3";
696        let local = "line1\nLOCAL CHANGE\nline3";
697        let remote = "line1\nline2\nREMOTE CHANGE";
698        match merge_text(local, remote, Some(base)) {
699            MergeResult::Merged(result) => {
700                assert!(result.contains("LOCAL CHANGE"));
701                assert!(result.contains("REMOTE CHANGE"));
702            }
703            other => panic!("Expected Merged, got {:?}", other),
704        }
705    }
706}