Skip to main content

cargo_rail/split/
engine.rs

1use crate::cargo::{CargoTransform, TransformContext};
2use crate::config::{SplitMode, WorkspaceMode};
3use crate::error::{GitError, RailError, RailResult, ResultExt};
4use crate::git::mappings::MappingStore;
5use crate::git::{CommitInfo, SystemGit};
6use crate::progress;
7use crate::utils;
8use crate::workspace::WorkspaceContext;
9use crate::workspace::files::{AuxiliaryFiles, ProjectFiles};
10use glob::Pattern;
11use rayon::prelude::*;
12use std::collections::HashMap;
13use std::path::{Path, PathBuf};
14use std::sync::Arc;
15
16/// Configuration for a split operation
17pub struct SplitConfig {
18  /// Name of the crate being split
19  pub crate_name: String,
20  /// Paths to crate directories in monorepo
21  pub crate_paths: Vec<PathBuf>,
22  /// Split mode (single or combined)
23  pub mode: SplitMode,
24  /// Workspace mode (standalone or workspace)
25  pub workspace_mode: WorkspaceMode,
26  /// Target repository path
27  pub target_repo_path: PathBuf,
28  /// Branch name for split repo
29  pub branch: String,
30  /// Remote repository URL
31  pub remote_url: Option<String>,
32  /// Additional files/directories to include (glob patterns)
33  pub include: Vec<String>,
34  /// Files/directories to exclude (glob patterns)
35  pub exclude: Vec<String>,
36}
37
38/// Pre-fetched files for a commit: (file_path, content)
39type PrefetchedFiles = Vec<(PathBuf, Vec<u8>)>;
40
41/// Maximum number of commits to prefetch at once
42/// This bounds memory usage to O(window_size Ɨ avg_commit_size) instead of O(total_commits Ɨ avg_commit_size)
43/// For a typical crate with ~1-2MB of files, 50 commits uses ~50-100MB max
44const PREFETCH_WINDOW_SIZE: usize = 50;
45
46/// Parameters for recreating a commit in the target repository
47struct RecreateCommitParams<'a> {
48  commit: &'a CommitInfo,
49  crate_paths: &'a [PathBuf],
50  target_repo_path: &'a Path,
51  crate_name: &'a str,
52  mode: &'a SplitMode,
53  workspace_mode: &'a WorkspaceMode,
54  mapping_store: &'a MappingStore,
55  last_recreated_sha: Option<&'a str>,
56  /// Pre-fetched files (if available from parallel prefetch)
57  prefetched_files: Option<&'a PrefetchedFiles>,
58}
59
60/// Parameters for creating a git commit
61struct CommitParams<'a> {
62  repo_path: &'a Path,
63  message: &'a str,
64  author_name: &'a str,
65  author_email: &'a str,
66  committer_name: &'a str,
67  committer_email: &'a str,
68  timestamp: i64,
69  parent_shas: &'a [String],
70}
71
72/// Split engine - extracts crates with full history
73///
74/// Deterministic git splitting: same input = same commit SHAs
75/// Uses WorkspaceContext for git and cargo operations - no duplicate loads.
76pub struct SplitEngine<'a> {
77  ctx: &'a WorkspaceContext,
78  transform: CargoTransform,
79}
80
81impl<'a> SplitEngine<'a> {
82  /// Create a new split engine from workspace context
83  pub fn new(ctx: &'a WorkspaceContext) -> RailResult<Self> {
84    // Build CargoTransform from context's metadata
85    let transformer = CargoTransform::new(ctx.cargo.metadata().clone());
86
87    Ok(Self {
88      ctx,
89      transform: transformer,
90    })
91  }
92
93  /// Check if a file path should be excluded based on glob patterns
94  fn should_exclude(path: &str, exclude_patterns: &[Pattern]) -> bool {
95    for pattern in exclude_patterns {
96      if pattern.matches(path) {
97        return true;
98      }
99    }
100    false
101  }
102
103  /// Compile glob patterns from string slices
104  fn compile_patterns(patterns: &[String]) -> Vec<Pattern> {
105    patterns.iter().filter_map(|p| Pattern::new(p).ok()).collect()
106  }
107
108  /// Find additional files to include based on include patterns
109  fn find_included_files(workspace_root: &Path, include_patterns: &[String]) -> RailResult<Vec<PathBuf>> {
110    use std::collections::HashSet;
111    let mut included = HashSet::new();
112
113    if include_patterns.is_empty() {
114      return Ok(Vec::new());
115    }
116
117    // Use glob to find files matching include patterns
118    for pattern_str in include_patterns {
119      let full_pattern = workspace_root.join(pattern_str);
120      let glob_pattern = full_pattern.to_string_lossy();
121
122      if let Ok(paths) = glob::glob(&glob_pattern) {
123        for path_result in paths.flatten() {
124          if path_result.is_file() {
125            // Skip .git directory contents
126            let path_str = path_result.to_string_lossy();
127            if path_str.contains("/.git/") || path_str.contains("\\.git\\") {
128              continue;
129            }
130
131            // Get relative path
132            if let Ok(rel) = path_result.strip_prefix(workspace_root) {
133              included.insert(rel.to_path_buf());
134            }
135          }
136        }
137      }
138    }
139
140    Ok(included.into_iter().collect())
141  }
142
143  /// Walk commit history and filter commits that touch the given paths
144  /// Returns commits in chronological order (oldest first)
145  fn walk_filtered_history(&self, paths: &[PathBuf]) -> RailResult<Vec<CommitInfo>> {
146    progress!("   Walking commit history to find commits touching crate...");
147
148    // Use batched git command for all paths at once (much faster than N separate calls)
149    let filtered_commits = self.ctx.git.git().get_commits_touching_paths(paths, None, "HEAD")?;
150
151    progress!(
152      "   Found {} total commits that touch the crate paths",
153      filtered_commits.len()
154    );
155
156    Ok(filtered_commits)
157  }
158
159  /// Prefetch files for multiple commits in parallel
160  ///
161  /// This significantly speeds up split operations on large repositories by
162  /// reading file contents for many commits concurrently while the sequential
163  /// commit recreation happens.
164  ///
165  /// Returns a HashMap from commit SHA to its prefetched files.
166  fn prefetch_commit_files(&self, commits: &[CommitInfo], crate_paths: &[PathBuf]) -> HashMap<String, PrefetchedFiles> {
167    // Use rayon to prefetch files in parallel
168    // Each commit's file collection is independent, so this is safe
169    let git = self.ctx.git.git();
170    let paths_arc = Arc::new(crate_paths.to_vec());
171
172    commits
173      .par_iter()
174      .filter_map(|commit| {
175        let paths = Arc::clone(&paths_arc);
176        let mut all_files = Vec::new();
177
178        for crate_path in paths.iter() {
179          match git.collect_tree_files(&commit.sha, crate_path) {
180            Ok(files) => all_files.extend(files),
181            Err(_) => {
182              // If we can't collect files, skip this commit in prefetch
183              // The main loop will handle it appropriately
184              return None;
185            }
186          }
187        }
188
189        Some((commit.sha.clone(), all_files))
190      })
191      .collect()
192  }
193
194  /// Apply Cargo.toml transformation to a manifest file
195  /// Returns Ok(()) if transform succeeded or file doesn't exist
196  ///
197  /// # Arguments
198  /// * `manifest_path` - Path to the Cargo.toml to transform
199  /// * `crate_name` - Name of the crate being transformed
200  /// * `target_has_workspace` - Whether target repo will have a workspace structure
201  ///   - true: keep `[lints] workspace = true` (Combined + Workspace mode)
202  ///   - false: resolve `[lints]` to actual values (Single or Combined + Standalone mode)
203  fn apply_manifest_transform(
204    &self,
205    manifest_path: &Path,
206    crate_name: &str,
207    target_has_workspace: bool,
208  ) -> RailResult<()> {
209    if !manifest_path.exists() {
210      return Ok(());
211    }
212
213    let content = std::fs::read_to_string(manifest_path)?;
214    let context = TransformContext {
215      crate_name: crate_name.to_string(),
216      workspace_root: self.ctx.workspace_root().to_path_buf(),
217      target_has_workspace,
218    };
219    let transformed = self.transform.transform_to_split(&content, &context)?;
220    std::fs::write(manifest_path, transformed)?;
221    Ok(())
222  }
223
224  /// Recreate a commit in the target repository with transforms applied
225  /// Returns the new commit SHA, or None if the commit should be skipped
226  /// (e.g., when files were deleted at this commit - "dirty history")
227  fn recreate_commit_in_target(&self, params: &RecreateCommitParams) -> RailResult<Option<String>> {
228    // Use pre-fetched files if available, otherwise collect them now
229    let all_files: Vec<(PathBuf, Vec<u8>)> = if let Some(prefetched) = params.prefetched_files {
230      prefetched.clone()
231    } else {
232      let mut files = Vec::new();
233      for crate_path in params.crate_paths {
234        let collected = self.ctx.git.git().collect_tree_files(&params.commit.sha, crate_path)?;
235        files.extend(collected);
236      }
237      files
238    };
239
240    // Handle "dirty history" - commits where the path was deleted or didn't exist yet
241    // This commonly happens when:
242    // - A crate was temporarily removed and later restored
243    // - Files were moved/renamed in a way that deleted the old path
244    // - The crate didn't exist at the start of the filtered history
245    if all_files.is_empty() {
246      return Ok(None);
247    }
248
249    // Write files to target repo, applying transforms
250    for (file_path, content_bytes) in &all_files {
251      let target_path = match params.mode {
252        SplitMode::Single => {
253          // For single mode, move files to root (strip crate path prefix)
254          let mut relative = file_path.clone();
255          for crate_path in params.crate_paths {
256            if let Ok(stripped) = file_path.strip_prefix(crate_path) {
257              relative = stripped.to_path_buf();
258              break;
259            }
260          }
261          params.target_repo_path.join(relative)
262        }
263        SplitMode::Combined => {
264          // For combined mode, preserve paths
265          params.target_repo_path.join(file_path)
266        }
267      };
268
269      // Create parent directories
270      if let Some(parent) = target_path.parent() {
271        std::fs::create_dir_all(parent)?;
272      }
273
274      // Write file content
275      std::fs::write(&target_path, content_bytes)?;
276
277      // Apply Cargo.toml transformation if applicable
278      if file_path.file_name() == Some(std::ffi::OsStr::new("Cargo.toml")) {
279        // Determine if target will have a workspace structure:
280        // - Single mode: always standalone (no workspace)
281        // - Combined + Standalone: no workspace
282        // - Combined + Workspace: has workspace
283        let target_has_workspace =
284          *params.mode == SplitMode::Combined && *params.workspace_mode == WorkspaceMode::Workspace;
285        self.apply_manifest_transform(&target_path, params.crate_name, target_has_workspace)?;
286      }
287    }
288
289    // Create commit using git command for determinism
290    // Map parent SHAs from monorepo to split repo
291    let mut mapped_parents: Vec<String> = params
292      .commit
293      .parent_shas
294      .iter()
295      .filter_map(|parent_sha| params.mapping_store.get_mapping(parent_sha).ok().flatten())
296      .collect();
297
298    // If no mapped parents (because original parents were filtered out),
299    // use the last recreated commit as parent to maintain linear history
300    if mapped_parents.is_empty()
301      && let Some(ref sha) = params.last_recreated_sha
302    {
303      mapped_parents.push(sha.to_string());
304    }
305
306    let sha = self.create_git_commit(&CommitParams {
307      repo_path: params.target_repo_path,
308      message: &params.commit.message,
309      author_name: &params.commit.author,
310      author_email: &params.commit.author_email,
311      committer_name: &params.commit.committer,
312      committer_email: &params.commit.committer_email,
313      timestamp: params.commit.timestamp,
314      parent_shas: &mapped_parents,
315    })?;
316    Ok(Some(sha))
317  }
318
319  /// Create a git commit using git commands for determinism
320  /// Uses git commit-tree for full control over parents
321  fn create_git_commit(&self, params: &CommitParams) -> RailResult<String> {
322    use std::process::Command;
323
324    // Stage all files
325    let status = Command::new("git")
326      .current_dir(params.repo_path)
327      .args(["add", "-A"])
328      .status()
329      .context("Failed to run git add")?;
330
331    if !status.success() {
332      return Err(RailError::Git(GitError::CommandFailed {
333        command: "git add".to_string(),
334        stderr: "git add failed".to_string(),
335      }));
336    }
337
338    // Write the tree
339    let output = Command::new("git")
340      .current_dir(params.repo_path)
341      .args(["write-tree"])
342      .output()
343      .context("Failed to write tree")?;
344
345    if !output.status.success() {
346      return Err(RailError::Git(GitError::CommandFailed {
347        command: "git write-tree".to_string(),
348        stderr: "git write-tree failed".to_string(),
349      }));
350    }
351
352    let tree_sha = String::from_utf8(output.stdout)?.trim().to_string();
353
354    // Prepare environment for deterministic commit
355    let author_date = format!("{} +0000", params.timestamp);
356    let commit_date = format!("{} +0000", params.timestamp);
357
358    // Build commit-tree command
359    let mut cmd = Command::new("git");
360    cmd
361      .current_dir(params.repo_path)
362      .env("GIT_AUTHOR_NAME", params.author_name)
363      .env("GIT_AUTHOR_EMAIL", params.author_email)
364      .env("GIT_AUTHOR_DATE", &author_date)
365      .env("GIT_COMMITTER_NAME", params.committer_name)
366      .env("GIT_COMMITTER_EMAIL", params.committer_email)
367      .env("GIT_COMMITTER_DATE", &commit_date)
368      .arg("commit-tree")
369      .arg(&tree_sha)
370      .arg("-m")
371      .arg(params.message);
372
373    // Add parent arguments
374    for parent in params.parent_shas {
375      cmd.arg("-p").arg(parent);
376    }
377
378    // Execute commit-tree
379    let output = cmd.output().context("Failed to run git commit-tree")?;
380
381    if !output.status.success() {
382      let stderr = String::from_utf8_lossy(&output.stderr);
383      return Err(RailError::Git(GitError::CommandFailed {
384        command: "git commit-tree".to_string(),
385        stderr: stderr.to_string(),
386      }));
387    }
388
389    let commit_sha = String::from_utf8(output.stdout)?.trim().to_string();
390
391    // Update the branch reference
392    Command::new("git")
393      .current_dir(params.repo_path)
394      .args(["update-ref", "HEAD", &commit_sha])
395      .status()
396      .context("Failed to update HEAD")?;
397
398    Ok(commit_sha)
399  }
400
401  /// Check if remote repository exists and has content
402  fn check_remote_exists(&self, remote_url: &str) -> RailResult<bool> {
403    use std::process::Command;
404
405    let output = Command::new("git")
406      .args(["ls-remote", "--heads", remote_url])
407      .output()
408      .context("Failed to check remote")?;
409
410    // If command succeeds and has output, remote exists with content
411    Ok(output.status.success() && !output.stdout.is_empty())
412  }
413
414  /// Execute a split operation (idempotent - re-runs sync new commits only)
415  pub fn split(&self, config: &SplitConfig) -> RailResult<()> {
416    progress!("šŸš‚ Splitting crate: {}", config.crate_name);
417    progress!("   Mode: {:?}", config.mode);
418    progress!("   Target: {}", config.target_repo_path.display());
419
420    // Compile exclude patterns (include uses glob directly)
421    let exclude_patterns = Self::compile_patterns(&config.exclude);
422
423    if !config.include.is_empty() {
424      progress!("   Include patterns: {} configured", config.include.len());
425    }
426    if !config.exclude.is_empty() {
427      progress!("   Exclude patterns: {} configured", config.exclude.len());
428    }
429
430    // Check if target repo already exists (for idempotency)
431    let target_exists = config.target_repo_path.join(".git").exists();
432
433    // Check if remote already exists - warn but allow re-run for idempotency
434    if let Some(ref remote_url) = config.remote_url {
435      let remote_exists = self.check_remote_exists(remote_url)?;
436      if remote_exists && !target_exists {
437        // Remote exists but no local target - user probably wants to use sync instead
438        return Err(RailError::with_help(
439          format!("Split already exists at {}", remote_url),
440          format!(
441            "Split is a one-time operation. To update the split repo, use:\n  \
442             cargo rail sync {}\n\n\
443             This will sync new commits from the monorepo to the split repo.",
444            config.crate_name
445          ),
446        ));
447      }
448      // If both remote and target exist, we'll check mappings below for idempotency
449    }
450
451    // Create or reuse target repo
452    self.ensure_target_repo(&config.target_repo_path)?;
453
454    // Discover workspace-level auxiliary files from workspace
455    let aux_files = AuxiliaryFiles::discover(self.ctx.workspace_root())?;
456    progress!("   Found {} workspace config files", aux_files.count());
457
458    // Discover project files (README, LICENSE) with crate-first fallback
459    let crate_path = &config.crate_paths[0]; // Use first crate path for project files
460    let project_files = ProjectFiles::discover(self.ctx.workspace_root(), crate_path)?;
461    progress!("   Found {} project files (README, LICENSE)", project_files.count());
462
463    // Find additional files to include based on include patterns
464    let additional_files = Self::find_included_files(self.ctx.workspace_root(), &config.include)?;
465    if !additional_files.is_empty() {
466      progress!(
467        "   Found {} additional files from include patterns",
468        additional_files.len()
469      );
470    }
471
472    // Create mapping store and load existing mappings (from both workspace and target)
473    let mut mapping_store = MappingStore::new(config.crate_name.clone());
474    mapping_store.load(self.ctx.workspace_root())?;
475    if target_exists {
476      mapping_store.load(&config.target_repo_path)?;
477    }
478
479    // Walk filtered history to find commits touching the crate
480    let filtered_commits = self.walk_filtered_history(&config.crate_paths)?;
481
482    // Count how many commits are already mapped (for idempotency)
483    let already_mapped_count = filtered_commits
484      .iter()
485      .filter(|c| mapping_store.has_mapping(&c.sha))
486      .count();
487
488    if already_mapped_count > 0 {
489      progress!("   Found {} commits already split (will skip)", already_mapped_count);
490    }
491
492    // Check if all commits are already mapped - nothing to do
493    if already_mapped_count == filtered_commits.len() && !filtered_commits.is_empty() {
494      progress!("\nāœ… Split already up-to-date!");
495      progress!("   All {} commits have been split previously.", filtered_commits.len());
496      progress!("   Target repo: {}", config.target_repo_path.display());
497      return Ok(());
498    }
499
500    if filtered_commits.is_empty() {
501      progress!("   No commits found that touch the crate paths");
502      progress!("   Falling back to current state copy...");
503
504      // Fallback to snapshot copy if no history found
505      match config.mode {
506        SplitMode::Single => {
507          let crate_path = &config.crate_paths[0];
508          self.split_single_crate(crate_path, &config.target_repo_path, &aux_files, &config.crate_name)?;
509        }
510        SplitMode::Combined => {
511          self.split_combined_crates(
512            &config.crate_paths,
513            &config.target_repo_path,
514            &aux_files,
515            &config.crate_name,
516            &config.workspace_mode,
517          )?;
518        }
519      }
520    } else {
521      // Recreate history in target repo
522      progress!("   Processing {} commits...", filtered_commits.len());
523
524      let mut last_recreated_sha: Option<String> = None;
525      let mut skipped_commits = 0usize;
526      let skipped_already_mapped = already_mapped_count;
527
528      // For incremental splits, find the last mapped commit's SHA in target repo
529      // to use as parent for new commits
530      if target_exists && already_mapped_count > 0 {
531        // Find the most recent mapped commit and use its target SHA as last_recreated_sha
532        for commit in filtered_commits.iter().rev() {
533          if let Ok(Some(target_sha)) = mapping_store.get_mapping(&commit.sha) {
534            last_recreated_sha = Some(target_sha);
535            break;
536          }
537        }
538      }
539
540      // Filter out already-mapped commits upfront for accurate counting and windowing
541      let commits_to_process: Vec<&CommitInfo> = filtered_commits
542        .iter()
543        .filter(|c| !mapping_store.has_mapping(&c.sha))
544        .collect();
545
546      let total_new = commits_to_process.len();
547
548      // Process commits in windows to bound memory usage
549      // Each window prefetches files for up to PREFETCH_WINDOW_SIZE commits,
550      // processes them, then drops the prefetch cache before the next window.
551      // This limits memory to O(window_size Ɨ avg_commit_size) instead of O(total Ɨ avg_commit_size)
552      let use_parallel = total_new > 5;
553
554      for (window_idx, window) in commits_to_process.chunks(PREFETCH_WINDOW_SIZE).enumerate() {
555        // Prefetch this window's files in parallel
556        let prefetched_files: HashMap<String, PrefetchedFiles> = if use_parallel {
557          if window_idx == 0 {
558            if total_new > PREFETCH_WINDOW_SIZE {
559              progress!(
560                "   Prefetching in windows of {} commits to bound memory...",
561                PREFETCH_WINDOW_SIZE
562              );
563            } else {
564              progress!("   Prefetching file contents in parallel...");
565            }
566          }
567          // Convert &[&CommitInfo] to Vec<CommitInfo> for prefetch
568          let window_commits: Vec<CommitInfo> = window.iter().map(|c| (*c).clone()).collect();
569          self.prefetch_commit_files(&window_commits, &config.crate_paths)
570        } else {
571          HashMap::new()
572        };
573
574        // Process this window's commits
575        for (idx_in_window, commit) in window.iter().enumerate() {
576          let overall_idx = window_idx * PREFETCH_WINDOW_SIZE + idx_in_window + 1;
577
578          if overall_idx.is_multiple_of(10) || overall_idx == total_new {
579            progress!("   Progress: {}/{} new commits", overall_idx, total_new);
580          }
581
582          // Use prefetched files if available
583          let prefetched = prefetched_files.get(&commit.sha);
584
585          let maybe_sha = self.recreate_commit_in_target(&RecreateCommitParams {
586            commit,
587            crate_paths: &config.crate_paths,
588            target_repo_path: &config.target_repo_path,
589            crate_name: &config.crate_name,
590            mode: &config.mode,
591            workspace_mode: &config.workspace_mode,
592            mapping_store: &mapping_store,
593            last_recreated_sha: last_recreated_sha.as_deref(),
594            prefetched_files: prefetched,
595          })?;
596
597          // Handle skipped commits (dirty history - path didn't exist at this commit)
598          let Some(new_sha) = maybe_sha else {
599            skipped_commits += 1;
600            continue;
601          };
602
603          // Record mapping
604          mapping_store.record_mapping(&commit.sha, &new_sha)?;
605
606          // Track last recreated commit
607          last_recreated_sha = Some(new_sha);
608        }
609
610        // prefetched_files is dropped here at end of window iteration,
611        // freeing memory before the next window is prefetched
612      }
613
614      if skipped_commits > 0 || skipped_already_mapped > 0 {
615        if skipped_commits > 0 {
616          progress!(
617            "   Skipped {} commits where path didn't exist (dirty history)",
618            skipped_commits
619          );
620        }
621        if skipped_already_mapped > 0 {
622          progress!(
623            "   Skipped {} commits already split (idempotent)",
624            skipped_already_mapped
625          );
626        }
627      }
628
629      // Create workspace Cargo.toml if in workspace mode
630      if config.mode == SplitMode::Combined && config.workspace_mode == WorkspaceMode::Workspace {
631        progress!("   Creating workspace Cargo.toml...");
632        self.create_workspace_cargo_toml(&config.crate_paths, &config.target_repo_path)?;
633      }
634
635      // Copy workspace config files and project files to the final state
636      let has_files = !aux_files.is_empty() || project_files.count() > 0 || !additional_files.is_empty();
637      if has_files {
638        progress!("   Copying workspace configs and project files...");
639        aux_files.copy_to_split(self.ctx.workspace_root(), &config.target_repo_path)?;
640        project_files.copy_to_split(self.ctx.workspace_root(), &config.target_repo_path)?;
641
642        // Copy additional files from include patterns
643        if !additional_files.is_empty() {
644          progress!(
645            "   Copying {} additional files from include patterns...",
646            additional_files.len()
647          );
648          for rel_path in &additional_files {
649            let source = self.ctx.workspace_root().join(rel_path);
650            let target = config.target_repo_path.join(rel_path);
651
652            // Skip files that match exclude patterns
653            let path_str = rel_path.to_string_lossy();
654            if Self::should_exclude(&path_str, &exclude_patterns) {
655              continue;
656            }
657
658            // Create parent directories and copy
659            if let Some(parent) = target.parent() {
660              std::fs::create_dir_all(parent)?;
661            }
662            if source.exists() && source.is_file() {
663              std::fs::copy(&source, &target)?;
664            }
665          }
666        }
667
668        // Create a final commit if any files were added
669        // git add -A is safe to run unconditionally (no-op if no changes)
670        std::process::Command::new("git")
671          .current_dir(&config.target_repo_path)
672          .args(["add", "-A"])
673          .status()?;
674
675        // Check if there are staged changes before committing
676        let diff_cached = std::process::Command::new("git")
677          .current_dir(&config.target_repo_path)
678          .args(["diff", "--cached", "--quiet"])
679          .status()?;
680
681        if !diff_cached.success() {
682          // Exit code 1 means there are differences (i.e., staged changes)
683          progress!("   Creating commit for auxiliary files");
684          std::process::Command::new("git")
685            .current_dir(&config.target_repo_path)
686            .args(["commit", "-m", "Add workspace configs and project files"])
687            .status()?;
688        }
689      }
690    }
691
692    // Save mappings to both workspace and target repo
693    mapping_store.save(self.ctx.workspace_root())?;
694    mapping_store.save(&config.target_repo_path)?;
695
696    // Push to remote if URL is configured and is not a local file path
697    if let Some(ref remote_url) = config.remote_url {
698      if !remote_url.is_empty() && !utils::is_local_path(remote_url) {
699        progress!("\nšŸš€ Pushing to remote...");
700
701        // Open the target repo
702        let target_git = SystemGit::open(&config.target_repo_path)?;
703
704        // Add or update remote
705        if !target_git.has_remote("origin")? {
706          progress!("   Adding remote 'origin': {}", remote_url);
707          target_git.add_remote("origin", remote_url)?;
708        } else {
709          progress!("   Remote 'origin' already exists");
710        }
711
712        // Push to remote
713        target_git.push_to_remote("origin", &config.branch)?;
714
715        // Push git-notes
716        mapping_store.push_notes(&config.target_repo_path, "origin")?;
717
718        progress!("   āœ… Pushed to {}", remote_url);
719      } else {
720        progress!("\nšŸ’¾ Split repository created locally");
721        if utils::is_local_path(remote_url) {
722          progress!("   Note: Remote is a local path, skipping push");
723          progress!(
724            "   Local testing mode - split repo at: {}",
725            config.target_repo_path.display()
726          );
727        } else {
728          progress!("   No remote URL configured");
729        }
730        progress!("\n   To push to a real remote later:");
731        progress!("   cd {}", config.target_repo_path.display());
732        progress!("   git remote add origin <url>");
733        progress!("   git push -u origin {}", config.branch);
734      }
735    } else {
736      progress!("\nāš ļø  No remote URL configured - repository created locally only");
737      progress!("   To push manually:");
738      progress!("   cd {}", config.target_repo_path.display());
739      progress!("   git remote add origin <url>");
740      progress!("   git push -u origin {}", config.branch);
741    }
742
743    progress!("\nāœ… Split complete!");
744    progress!("   Target repo: {}", config.target_repo_path.display());
745
746    Ok(())
747  }
748
749  /// Ensure target repository exists and is initialized
750  fn ensure_target_repo(&self, target_path: &Path) -> RailResult<()> {
751    if !target_path.exists() {
752      std::fs::create_dir_all(target_path)
753        .with_context(|| format!("Failed to create target directory: {}", target_path.display()))?;
754    }
755
756    // Check if it's already a git repo
757    let git_dir = target_path.join(".git");
758    if !git_dir.exists() {
759      progress!("   Initializing git repository at {}", target_path.display());
760
761      // Initialize using system git with main as default branch
762      std::process::Command::new("git")
763        .arg("init")
764        .arg("--initial-branch=main")
765        .arg(target_path)
766        .output()
767        .with_context(|| format!("Failed to initialize git repository at {}", target_path.display()))?;
768
769      // Configure git identity from source repository
770      self.configure_git_identity(target_path)?;
771    }
772
773    Ok(())
774  }
775
776  /// Configure git identity in the target repository by copying from source
777  fn configure_git_identity(&self, target_path: &Path) -> RailResult<()> {
778    use std::process::Command;
779
780    // Get identity from source repository
781    let user_name = Command::new("git")
782      .current_dir(self.ctx.workspace_root())
783      .args(["config", "user.name"])
784      .output()
785      .ok()
786      .and_then(|o| {
787        if o.status.success() {
788          Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
789        } else {
790          None
791        }
792      });
793
794    let user_email = Command::new("git")
795      .current_dir(self.ctx.workspace_root())
796      .args(["config", "user.email"])
797      .output()
798      .ok()
799      .and_then(|o| {
800        if o.status.success() {
801          Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
802        } else {
803          None
804        }
805      });
806
807    // Set identity in target repository
808    // Use a fallback if source doesn't have identity configured
809    let name = user_name.as_deref().unwrap_or("Cargo Rail");
810    let email = user_email.as_deref().unwrap_or("cargo-rail@localhost");
811
812    let output = Command::new("git")
813      .current_dir(target_path)
814      .args(["config", "user.name", name])
815      .output()
816      .context("Failed to configure git user.name")?;
817
818    if !output.status.success() {
819      let stderr = String::from_utf8_lossy(&output.stderr);
820      return Err(RailError::Git(GitError::CommandFailed {
821        command: "git config user.name".to_string(),
822        stderr: stderr.to_string(),
823      }));
824    }
825
826    let output = Command::new("git")
827      .current_dir(target_path)
828      .args(["config", "user.email", email])
829      .output()
830      .context("Failed to configure git user.email")?;
831
832    if !output.status.success() {
833      let stderr = String::from_utf8_lossy(&output.stderr);
834      return Err(RailError::Git(GitError::CommandFailed {
835        command: "git config user.email".to_string(),
836        stderr: stderr.to_string(),
837      }));
838    }
839
840    Ok(())
841  }
842
843  /// Split a single crate (move to root of target repo)
844  fn split_single_crate(
845    &self,
846    crate_path: &Path,
847    target_repo_path: &Path,
848    aux_files: &AuxiliaryFiles,
849    crate_name: &str,
850  ) -> RailResult<()> {
851    let source_path = self.ctx.workspace_root().join(crate_path);
852
853    // Copy source files
854    progress!("   Copying source files from {}", crate_path.display());
855    self.copy_directory_recursive(&source_path, target_repo_path)?;
856
857    // Transform Cargo.toml manifest
858    // Single mode is always standalone (no workspace)
859    progress!("   Transforming Cargo.toml");
860    let manifest_path = target_repo_path.join("Cargo.toml");
861    self.apply_manifest_transform(&manifest_path, crate_name, false)?;
862
863    // Copy auxiliary files
864    if !aux_files.is_empty() {
865      progress!("   Copying auxiliary files");
866      aux_files.copy_to_split(self.ctx.workspace_root(), target_repo_path)?;
867    }
868
869    Ok(())
870  }
871
872  /// Split multiple crates (preserve structure in target repo)
873  fn split_combined_crates(
874    &self,
875    crate_paths: &[PathBuf],
876    target_repo_path: &Path,
877    aux_files: &AuxiliaryFiles,
878    crate_name: &str,
879    workspace_mode: &WorkspaceMode,
880  ) -> RailResult<()> {
881    // Determine if target will have a workspace structure
882    let target_has_workspace = *workspace_mode == WorkspaceMode::Workspace;
883
884    for crate_path in crate_paths {
885      let source_path = self.ctx.workspace_root().join(crate_path);
886      let target_path = target_repo_path.join(crate_path);
887
888      progress!("   Copying {} to {}", crate_path.display(), crate_path.display());
889
890      // Create parent directories
891      if let Some(parent) = target_path.parent() {
892        std::fs::create_dir_all(parent)?;
893      }
894
895      self.copy_directory_recursive(&source_path, &target_path)?;
896
897      // Transform Cargo.toml manifest
898      let manifest_path = target_path.join("Cargo.toml");
899      self.apply_manifest_transform(&manifest_path, crate_name, target_has_workspace)?;
900    }
901
902    // Copy auxiliary files
903    if !aux_files.is_empty() {
904      progress!("   Copying auxiliary files");
905      aux_files.copy_to_split(self.ctx.workspace_root(), target_repo_path)?;
906    }
907
908    Ok(())
909  }
910
911  /// Create a workspace Cargo.toml for combined mode with workspace_mode = Workspace
912  fn create_workspace_cargo_toml(&self, crate_paths: &[PathBuf], target_repo_path: &Path) -> RailResult<()> {
913    // Extract workspace members from crate paths
914    let members: Vec<String> = crate_paths.iter().map(|p| p.to_string_lossy().to_string()).collect();
915
916    // Read workspace Cargo.toml from source monorepo
917    let source_workspace_toml = self.ctx.workspace_root().join("Cargo.toml");
918    let source_content = std::fs::read_to_string(&source_workspace_toml).with_context(|| {
919      format!(
920        "Failed to read workspace Cargo.toml from {}",
921        source_workspace_toml.display()
922      )
923    })?;
924
925    // Parse the source Cargo.toml
926    let mut doc: toml_edit::DocumentMut = source_content
927      .parse()
928      .map_err(|e| RailError::message(format!("Failed to parse workspace Cargo.toml: {}", e)))?;
929
930    // Update workspace members
931    if let Some(workspace) = doc.get_mut("workspace")
932      && let Some(table) = workspace.as_table_mut()
933    {
934      // Set members to only the split crates
935      let mut members_array = toml_edit::Array::new();
936      for member in &members {
937        members_array.push(member.as_str());
938      }
939      table.insert("members", toml_edit::value(members_array));
940
941      // Remove exclude if present (not needed for split repo)
942      table.remove("exclude");
943
944      // Filter default-members to only include split crates
945      let members_set: std::collections::HashSet<&str> = members.iter().map(|s| s.as_str()).collect();
946      if let Some(default_members) = table.get_mut("default-members")
947        && let Some(arr) = default_members.as_array_mut()
948      {
949        arr.retain(|item| item.as_str().map(|s| members_set.contains(s)).unwrap_or(false));
950      }
951      // Remove default-members if empty
952      if table
953        .get("default-members")
954        .and_then(|d| d.as_array())
955        .map(|a| a.is_empty())
956        .unwrap_or(false)
957      {
958        table.remove("default-members");
959      }
960
961      // Remove workspace.dependencies - split crates have inlined deps
962      table.remove("dependencies");
963    }
964
965    // Filter profile package specs to only include split crates
966    let members_set: std::collections::HashSet<&str> = members.iter().map(|s| s.as_str()).collect();
967    if let Some(profile) = doc.get_mut("profile").and_then(|p| p.as_table_mut()) {
968      for (_, profile_section) in profile.iter_mut() {
969        if let Some(profile_table) = profile_section.as_table_mut() {
970          if let Some(pkg) = profile_table.get_mut("package").and_then(|p| p.as_table_mut()) {
971            let pkg_names: Vec<String> = pkg.iter().map(|(k, _)| k.to_string()).collect();
972            for pkg_name in pkg_names {
973              if !members_set.contains(pkg_name.as_str()) {
974                pkg.remove(&pkg_name);
975              }
976            }
977          }
978          // Remove empty package table
979          if profile_table
980            .get("package")
981            .and_then(|p| p.as_table())
982            .map(|t| t.is_empty())
983            .unwrap_or(false)
984          {
985            profile_table.remove("package");
986          }
987        }
988      }
989    }
990
991    // Remove package section if present (virtual workspace)
992    doc.remove("package");
993    doc.remove("dependencies");
994    doc.remove("dev-dependencies");
995    doc.remove("build-dependencies");
996
997    // Write to target repo
998    let target_toml = target_repo_path.join("Cargo.toml");
999    std::fs::write(&target_toml, doc.to_string())?;
1000
1001    progress!("   Created workspace Cargo.toml with {} members", members.len());
1002
1003    Ok(())
1004  }
1005
1006  /// Recursively copy a directory, excluding .git
1007  fn copy_directory_recursive(&self, source: &Path, target: &Path) -> RailResult<()> {
1008    copy_directory_recursive_impl(source, target)
1009  }
1010}
1011
1012/// Helper function to recursively copy a directory, excluding .git
1013fn copy_directory_recursive_impl(source: &Path, target: &Path) -> RailResult<()> {
1014  if !source.exists() {
1015    return Err(RailError::message(format!(
1016      "Source path does not exist: {}",
1017      source.display()
1018    )));
1019  }
1020
1021  if source.is_file() {
1022    if let Some(parent) = target.parent() {
1023      std::fs::create_dir_all(parent)?;
1024    }
1025    std::fs::copy(source, target)?;
1026    return Ok(());
1027  }
1028
1029  std::fs::create_dir_all(target)?;
1030
1031  for entry in std::fs::read_dir(source)? {
1032    let entry = entry?;
1033    let file_type = entry.file_type()?;
1034    let file_name = entry.file_name();
1035
1036    // Skip .git directory
1037    if file_name == ".git" {
1038      continue;
1039    }
1040
1041    let source_path = entry.path();
1042    let target_path = target.join(&file_name);
1043
1044    if file_type.is_dir() {
1045      copy_directory_recursive_impl(&source_path, &target_path)?;
1046    } else {
1047      std::fs::copy(&source_path, &target_path)?;
1048    }
1049  }
1050
1051  Ok(())
1052}
1053
1054#[cfg(test)]
1055mod tests {
1056  use super::*;
1057  use std::fs;
1058  use tempfile::TempDir;
1059
1060  /// Helper to find the git repository root from the current directory.
1061  /// This is needed because tests run from the crate directory, but the
1062  /// git repository may be at the workspace root.
1063  fn find_git_root() -> PathBuf {
1064    let current_dir = std::env::current_dir().unwrap();
1065    match SystemGit::open(&current_dir) {
1066      Ok(git) => git.worktree_root.clone(),
1067      Err(_) => current_dir,
1068    }
1069  }
1070
1071  #[test]
1072  fn test_copy_directory_recursive() {
1073    let temp = TempDir::new().unwrap();
1074    let source = temp.path().join("source");
1075    let target = temp.path().join("target");
1076
1077    // Create source structure
1078    fs::create_dir_all(source.join("src")).unwrap();
1079    fs::write(source.join("Cargo.toml"), "test").unwrap();
1080    fs::write(source.join("src/lib.rs"), "pub fn test() {}").unwrap();
1081    fs::create_dir(source.join(".git")).unwrap(); // Should be excluded
1082
1083    let workspace_root = find_git_root();
1084    let ctx = WorkspaceContext::build(&workspace_root).unwrap();
1085    let engine = SplitEngine::new(&ctx).unwrap();
1086
1087    engine.copy_directory_recursive(&source, &target).unwrap();
1088
1089    // Verify files copied
1090    assert!(target.join("Cargo.toml").exists());
1091    assert!(target.join("src/lib.rs").exists());
1092
1093    // Verify .git excluded
1094    assert!(!target.join(".git").exists());
1095  }
1096}