Skip to main content

semver_analyzer_core/
git.rs

1//! Language-agnostic git utilities.
2//!
3//! Provides shared git operations used across all language crates:
4//! - File reading from git refs (`read_git_file`)
5//! - File diffing between refs (`git_diff_file`)
6//! - Ref name sanitization (`sanitize_ref_name`)
7//! - Worktree path computation (`worktree_path_for`)
8//! - RAII worktree management (`WorktreeGuard`)
9//!
10//! These utilities were consolidated from duplicate implementations in
11//! `crates/ts/` and `crates/java/`. Language crates should use these
12//! directly (or compose with `WorktreeGuard`) rather than reimplementing
13//! git plumbing.
14
15use anyhow::{Context, Result};
16use std::collections::hash_map::DefaultHasher;
17use std::hash::{Hash, Hasher};
18use std::path::{Path, PathBuf};
19use std::process::Command;
20
21/// Top-level directory name for worktrees in the system temp dir.
22const WORKTREE_DIR_NAME: &str = "semver-worktrees";
23
24// ── Git file operations ──────────────────────────────────────────────
25
26/// Read a file from a git ref via `git show <ref>:<path>`.
27///
28/// Returns `None` if the file doesn't exist at the given ref,
29/// the git command fails, or the output is not valid UTF-8.
30/// All failures are logged at `trace` level for debugging with
31/// `--log-level trace --log-file debug.log`.
32pub fn read_git_file(repo: &Path, git_ref: &str, file_path: &str) -> Option<String> {
33    let output = Command::new("git")
34        .args(["show", &format!("{git_ref}:{file_path}")])
35        .current_dir(repo)
36        .output()
37        .map_err(|e| {
38            tracing::trace!(
39                %e,
40                repo = %repo.display(),
41                %git_ref,
42                %file_path,
43                "git show failed to execute"
44            );
45            e
46        })
47        .ok()?;
48
49    if !output.status.success() {
50        tracing::trace!(
51            repo = %repo.display(),
52            %git_ref,
53            %file_path,
54            stderr = %String::from_utf8_lossy(&output.stderr).trim(),
55            "git show returned non-zero"
56        );
57        return None;
58    }
59
60    String::from_utf8(output.stdout)
61        .map_err(|e| {
62            tracing::trace!(
63                %e,
64                %file_path,
65                "git show output was not valid UTF-8"
66            );
67            e
68        })
69        .ok()
70}
71
72/// Get the diff of a single file between two refs via `git diff <from>..<to> -- <path>`.
73///
74/// Returns `None` if the file has no changes between the refs,
75/// the git command fails, or the output is empty.
76/// All failures are logged at `trace` level.
77pub fn git_diff_file(repo: &Path, from_ref: &str, to_ref: &str, file_path: &str) -> Option<String> {
78    let output = Command::new("git")
79        .args([
80            "-C",
81            &repo.to_string_lossy(),
82            "diff",
83            &format!("{from_ref}..{to_ref}"),
84            "--",
85            file_path,
86        ])
87        .output()
88        .map_err(|e| {
89            tracing::trace!(
90                %e,
91                repo = %repo.display(),
92                %from_ref,
93                %to_ref,
94                %file_path,
95                "git diff failed to execute"
96            );
97            e
98        })
99        .ok()?;
100
101    if !output.status.success() {
102        tracing::trace!(
103            repo = %repo.display(),
104            %from_ref,
105            %to_ref,
106            %file_path,
107            stderr = %String::from_utf8_lossy(&output.stderr).trim(),
108            "git diff returned non-zero"
109        );
110        return None;
111    }
112
113    let content = String::from_utf8_lossy(&output.stdout).to_string();
114    if content.is_empty() {
115        None
116    } else {
117        Some(content)
118    }
119}
120
121// ── Deprecation commit co-change analysis ────────────────────────────
122
123/// A commit that added files to a deprecated component directory.
124#[derive(Debug, Clone)]
125pub struct DeprecationCommit {
126    /// Short commit SHA.
127    pub sha: String,
128    /// The deprecated component name extracted from the path
129    /// (e.g., "Tile" from `deprecated/components/Tile/Tile.tsx`).
130    pub component: String,
131}
132
133/// Find commits between `from_ref` and `to_ref` that added files to
134/// `deprecated/components/` directories (i.e., commits that deprecated
135/// a component).
136///
137/// Runs `git log --diff-filter=A` to find commits that added `.tsx`/`.ts`
138/// source files to deprecated component directories. Returns a list of
139/// `(sha, component_name)` pairs.
140///
141/// Returns an empty vec on any git failure (shallow clone, invalid refs, etc.).
142pub fn find_deprecation_commits(
143    repo: &Path,
144    from_ref: &str,
145    to_ref: &str,
146) -> Vec<DeprecationCommit> {
147    // Use git log with --diff-filter=A to find commits that ADDED files
148    // to deprecated component directories. The --name-only flag gives us
149    // the file paths so we can extract the component name.
150    let output = Command::new("git")
151        .args([
152            "log",
153            "--diff-filter=A",
154            "--name-only",
155            "--pretty=format:%h",
156            &format!("{}..{}", from_ref, to_ref),
157            "--",
158            "*/deprecated/components/*/[A-Z]*.tsx",
159            "*/deprecated/components/*/[A-Z]*.ts",
160        ])
161        .current_dir(repo)
162        .output();
163
164    let output = match output {
165        Ok(o) if o.status.success() => o,
166        Ok(o) => {
167            tracing::debug!(
168                stderr = %String::from_utf8_lossy(&o.stderr).trim(),
169                "git log for deprecation commits returned non-zero"
170            );
171            return vec![];
172        }
173        Err(e) => {
174            tracing::debug!(%e, "Failed to run git log for deprecation commits");
175            return vec![];
176        }
177    };
178
179    let stdout = String::from_utf8_lossy(&output.stdout);
180    let mut result = Vec::new();
181    let mut current_sha = String::new();
182
183    for line in stdout.lines() {
184        let line = line.trim();
185        if line.is_empty() {
186            continue;
187        }
188
189        // Lines that don't contain '/' are commit SHAs from --pretty=format:%h
190        if !line.contains('/') {
191            current_sha = line.to_string();
192            continue;
193        }
194
195        // File path line — extract the component name from
196        // "*/deprecated/components/<ComponentName>/<file>"
197        if current_sha.is_empty() {
198            continue;
199        }
200
201        if let Some(component) = extract_component_from_deprecated_path(line) {
202            // Avoid duplicates: same commit may add multiple files for one component
203            if !result
204                .iter()
205                .any(|dc: &DeprecationCommit| dc.sha == current_sha && dc.component == component)
206            {
207                result.push(DeprecationCommit {
208                    sha: current_sha.clone(),
209                    component,
210                });
211            }
212        }
213    }
214
215    result
216}
217
218/// Extract a component name from a deprecated component file path.
219///
220/// Looks for the pattern `deprecated/components/<Name>/` and returns `<Name>`.
221/// Returns `None` if the path doesn't match the expected pattern.
222fn extract_component_from_deprecated_path(path: &str) -> Option<String> {
223    let parts: Vec<&str> = path.split('/').collect();
224    for (i, part) in parts.iter().enumerate() {
225        if *part == "deprecated" && i + 2 < parts.len() && parts[i + 1] == "components" {
226            return Some(parts[i + 2].to_string());
227        }
228    }
229    None
230}
231
232/// Find component families whose source files were modified in the given commit.
233///
234/// Runs `git show --name-status --diff-filter=AM` to find Added or Modified files.
235/// Filters to source files (`.tsx`/`.ts`) in non-deprecated `components/` directories,
236/// excluding index files, tests, examples, docs, snapshots, and CSS.
237///
238/// Returns a deduplicated list of component family names (e.g., `["Card"]`).
239/// The `deprecated_family` parameter is excluded from results (to avoid
240/// self-matches), as are same-name families (already handled by Phase A.5).
241pub fn commit_co_changed_families(
242    repo: &Path,
243    commit_sha: &str,
244    deprecated_family: &str,
245) -> Vec<String> {
246    let output = Command::new("git")
247        .args([
248            "show",
249            "--name-only",
250            "--diff-filter=AM",
251            "--pretty=format:",
252            commit_sha,
253        ])
254        .current_dir(repo)
255        .output();
256
257    let output = match output {
258        Ok(o) if o.status.success() => o,
259        Ok(o) => {
260            tracing::debug!(
261                sha = commit_sha,
262                stderr = %String::from_utf8_lossy(&o.stderr).trim(),
263                "git show for commit co-change returned non-zero"
264            );
265            return vec![];
266        }
267        Err(e) => {
268            tracing::debug!(%e, sha = commit_sha, "Failed to run git show for co-change");
269            return vec![];
270        }
271    };
272
273    let stdout = String::from_utf8_lossy(&output.stdout);
274    let mut families = std::collections::HashSet::new();
275
276    for line in stdout.lines() {
277        let line = line.trim();
278        if line.is_empty() {
279            continue;
280        }
281
282        // Must be in a non-deprecated components/ directory
283        if !line.contains("/components/") || line.contains("/deprecated/") {
284            continue;
285        }
286
287        // Must be a source file (.tsx or .ts)
288        if !line.ends_with(".tsx") && !line.ends_with(".ts") {
289            continue;
290        }
291
292        // Exclude non-source files
293        if line.contains("/examples/")
294            || line.contains("/__tests__/")
295            || line.contains("__snapshots__")
296            || line.ends_with(".test.tsx")
297            || line.ends_with(".test.ts")
298            || line.ends_with(".spec.tsx")
299            || line.ends_with(".spec.ts")
300            || line.ends_with(".css")
301            || line.ends_with(".md")
302            || line.ends_with(".snap")
303        {
304            continue;
305        }
306
307        // Exclude index/barrel files
308        let filename = line.rsplit('/').next().unwrap_or("");
309        if filename == "index.ts" || filename == "index.tsx" {
310            continue;
311        }
312
313        // Extract the component family name from the path:
314        // "packages/.../components/<FamilyName>/FileName.tsx" → "FamilyName"
315        if let Some(family) = extract_family_from_components_path(line) {
316            // Exclude the deprecated family itself and same-name families
317            if family != deprecated_family {
318                families.insert(family);
319            }
320        }
321    }
322
323    families.into_iter().collect()
324}
325
326/// Extract a component family name from a non-deprecated components path.
327///
328/// Looks for the pattern `components/<Name>/` and returns `<Name>`.
329fn extract_family_from_components_path(path: &str) -> Option<String> {
330    let parts: Vec<&str> = path.split('/').collect();
331    for (i, part) in parts.iter().enumerate() {
332        if *part == "components" && i + 1 < parts.len() {
333            // Make sure this isn't under deprecated/
334            if i > 0 && parts[i - 1] == "deprecated" {
335                continue;
336            }
337            return Some(parts[i + 1].to_string());
338        }
339    }
340    None
341}
342
343// ── Ref name utilities ───────────────────────────────────────────────
344
345/// Sanitize a git ref name for use as a directory name.
346///
347/// Replaces characters that are invalid in file paths (`/`, `\`, `:`,
348/// `*`, `?`, `"`, `<`, `>`, `|`) and ASCII control characters with `_`.
349/// Truncates to 100 characters to avoid path length issues.
350pub fn sanitize_ref_name(git_ref: &str) -> String {
351    let sanitized: String = git_ref
352        .chars()
353        .map(|c| match c {
354            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
355            c if c.is_ascii_control() => '_',
356            c => c,
357        })
358        .collect();
359
360    // Truncate to 100 chars to avoid path length issues
361    if sanitized.len() > 100 {
362        sanitized[..100].to_string()
363    } else {
364        sanitized
365    }
366}
367
368/// Compute a deterministic hash for a repo path.
369///
370/// Used to create unique worktree directories per repo in the system
371/// temp dir. Two runs against the same repo produce the same hash.
372fn repo_hash(repo: &Path) -> String {
373    let mut hasher = DefaultHasher::new();
374    repo.hash(&mut hasher);
375    format!("{:016x}", hasher.finish())
376}
377
378/// Generate a deterministic worktree path for a given ref.
379///
380/// Path format: `<tmp>/semver-worktrees/<repo-hash>/<sanitized-ref>`
381///
382/// Worktrees are placed in the system temp dir rather than inside the
383/// repo to avoid polluting the working tree. On crash, orphaned
384/// worktrees sit in `/tmp/` where the OS cleans them up on reboot.
385/// The repo hash ensures different repos don't collide.
386pub fn worktree_path_for(repo: &Path, git_ref: &str) -> PathBuf {
387    let sanitized = sanitize_ref_name(git_ref);
388    std::env::temp_dir()
389        .join(WORKTREE_DIR_NAME)
390        .join(repo_hash(repo))
391        .join(sanitized)
392}
393
394/// Return the parent directory for all worktrees of a given repo.
395///
396/// Path format: `<tmp>/semver-worktrees/<repo-hash>/`
397pub fn worktree_dir_for(repo: &Path) -> PathBuf {
398    std::env::temp_dir()
399        .join(WORKTREE_DIR_NAME)
400        .join(repo_hash(repo))
401}
402
403// ── WorktreeGuard ────────────────────────────────────────────────────
404
405/// RAII guard for a temporary git worktree.
406///
407/// Creates a detached worktree on construction, removes it on drop.
408/// This provides the language-agnostic foundation — just git checkout,
409/// no build steps. Language crates that need build steps (npm install,
410/// tsc, mvn compile) should compose with this guard:
411///
412/// ```ignore
413/// // In a language crate:
414/// pub struct TsWorktreeGuard {
415///     inner: semver_analyzer_core::git::WorktreeGuard,
416///     warnings: Vec<ExtractionWarning>,
417/// }
418/// ```
419pub struct WorktreeGuard {
420    repo_root: PathBuf,
421    worktree_path: PathBuf,
422    git_ref: String,
423    created: bool,
424}
425
426impl WorktreeGuard {
427    /// Create a new worktree for the given git ref.
428    ///
429    /// Validates the repository and ref, then creates a detached worktree
430    /// at `<repo>/.semver-worktrees/<sanitized-ref>`. If a stale worktree
431    /// exists at the same path, it is removed first.
432    ///
433    /// On drop, the worktree is automatically removed.
434    pub fn new(repo: &Path, git_ref: &str) -> Result<Self> {
435        let repo = repo
436            .canonicalize()
437            .with_context(|| format!("Failed to canonicalize repo path: {}", repo.display()))?;
438        let repo = repo.as_path();
439
440        validate_git_repo(repo)?;
441        validate_git_ref(repo, git_ref)?;
442
443        let worktree_path = worktree_path_for(repo, git_ref);
444
445        let mut guard = Self {
446            repo_root: repo.to_path_buf(),
447            worktree_path,
448            git_ref: git_ref.to_string(),
449            created: false,
450        };
451
452        // Ensure parent directory exists
453        if let Some(parent) = guard.worktree_path.parent() {
454            std::fs::create_dir_all(parent)
455                .context("Failed to create worktree parent directory")?;
456        }
457
458        // Remove stale worktree at same path if it exists
459        if guard.worktree_path.exists() {
460            let _ = remove_worktree(repo, &guard.worktree_path);
461            let _ = std::fs::remove_dir_all(&guard.worktree_path);
462        }
463
464        // Create worktree
465        create_worktree(repo, git_ref, &guard.worktree_path)?;
466        guard.created = true;
467
468        Ok(guard)
469    }
470
471    /// Path to the worktree directory.
472    pub fn path(&self) -> &Path {
473        &self.worktree_path
474    }
475
476    /// The git ref this worktree was created for.
477    pub fn git_ref(&self) -> &str {
478        &self.git_ref
479    }
480
481    /// Scan for and remove stale worktrees from previous crashed runs.
482    ///
483    /// Looks in `<tmp>/semver-worktrees/<repo-hash>/` for any existing
484    /// directories and attempts to clean them up via `git worktree remove`.
485    pub fn cleanup_stale(repo: &Path) -> Result<usize> {
486        let repo = repo
487            .canonicalize()
488            .with_context(|| format!("Failed to canonicalize repo path: {}", repo.display()))?;
489        let repo = repo.as_path();
490        let worktree_dir = worktree_dir_for(repo);
491        if !worktree_dir.exists() {
492            return Ok(0);
493        }
494
495        let mut cleaned = 0;
496        let entries =
497            std::fs::read_dir(&worktree_dir).context("Failed to read worktree directory")?;
498
499        for entry in entries.flatten() {
500            if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
501                let path = entry.path();
502                tracing::info!(path = %path.display(), "Cleaning up stale worktree");
503                if remove_worktree(repo, &path).is_ok() {
504                    cleaned += 1;
505                } else {
506                    // If git worktree remove fails, try force-removing the directory
507                    let _ = std::fs::remove_dir_all(&path);
508                    cleaned += 1;
509                }
510            }
511        }
512
513        // Remove the parent directory if it's now empty
514        if std::fs::read_dir(&worktree_dir)
515            .map(|mut d| d.next().is_none())
516            .unwrap_or(true)
517        {
518            let _ = std::fs::remove_dir(&worktree_dir);
519        }
520
521        Ok(cleaned)
522    }
523}
524
525impl Drop for WorktreeGuard {
526    fn drop(&mut self) {
527        if self.created {
528            if let Err(e) = remove_worktree(&self.repo_root, &self.worktree_path) {
529                tracing::warn!(
530                    path = %self.worktree_path.display(),
531                    error = %e,
532                    "Failed to remove worktree"
533                );
534                // Last resort: force remove the directory
535                let _ = std::fs::remove_dir_all(&self.worktree_path);
536            }
537        }
538    }
539}
540
541// ── Internal helpers ─────────────────────────────────────────────────
542
543/// Validate that the given path is a git repository.
544fn validate_git_repo(repo: &Path) -> Result<()> {
545    let output = Command::new("git")
546        .args(["rev-parse", "--git-dir"])
547        .current_dir(repo)
548        .output()
549        .context("Failed to run git")?;
550
551    if output.status.success() {
552        Ok(())
553    } else {
554        anyhow::bail!("Not a git repository: {}", repo.display())
555    }
556}
557
558/// Validate that a git ref exists in the repository.
559fn validate_git_ref(repo: &Path, git_ref: &str) -> Result<()> {
560    let output = Command::new("git")
561        .args(["rev-parse", "--verify", git_ref])
562        .current_dir(repo)
563        .output()
564        .context("Failed to validate git ref")?;
565
566    if output.status.success() {
567        Ok(())
568    } else {
569        anyhow::bail!("Git ref '{}' not found", git_ref)
570    }
571}
572
573/// Create a git worktree at the given path for the given ref.
574fn create_worktree(repo: &Path, git_ref: &str, worktree_path: &Path) -> Result<()> {
575    let output = Command::new("git")
576        .args([
577            "worktree",
578            "add",
579            "--detach",
580            &worktree_path.to_string_lossy(),
581            git_ref,
582        ])
583        .current_dir(repo)
584        .output()
585        .context("Failed to run git worktree add")?;
586
587    if output.status.success() {
588        Ok(())
589    } else {
590        let stderr = String::from_utf8_lossy(&output.stderr);
591        anyhow::bail!(
592            "git worktree add failed at {}: {}",
593            worktree_path.display(),
594            stderr.trim()
595        )
596    }
597}
598
599/// Remove a git worktree.
600fn remove_worktree(repo: &Path, worktree_path: &Path) -> Result<()> {
601    let output = Command::new("git")
602        .args([
603            "worktree",
604            "remove",
605            "--force",
606            &worktree_path.to_string_lossy(),
607        ])
608        .current_dir(repo)
609        .output()
610        .context("Failed to run git worktree remove")?;
611
612    if output.status.success() {
613        Ok(())
614    } else {
615        let stderr = String::from_utf8_lossy(&output.stderr);
616        anyhow::bail!(
617            "git worktree remove failed at {}: {}",
618            worktree_path.display(),
619            stderr.trim()
620        )
621    }
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627
628    #[test]
629    fn sanitize_simple_ref() {
630        assert_eq!(sanitize_ref_name("v1.0.0"), "v1.0.0");
631    }
632
633    #[test]
634    fn sanitize_ref_with_slashes() {
635        assert_eq!(sanitize_ref_name("feature/my-branch"), "feature_my-branch");
636    }
637
638    #[test]
639    fn sanitize_ref_with_special_chars() {
640        assert_eq!(
641            sanitize_ref_name("ref:with*special?chars"),
642            "ref_with_special_chars"
643        );
644    }
645
646    #[test]
647    fn sanitize_long_ref_truncated() {
648        let long_ref = "a".repeat(150);
649        let result = sanitize_ref_name(&long_ref);
650        assert_eq!(result.len(), 100);
651    }
652
653    #[test]
654    fn worktree_path_in_tmp_dir() {
655        let repo = Path::new("/repos/my-project");
656        let path = worktree_path_for(repo, "v1.0.0");
657        let expected = std::env::temp_dir()
658            .join("semver-worktrees")
659            .join(repo_hash(repo))
660            .join("v1.0.0");
661        assert_eq!(path, expected);
662    }
663
664    #[test]
665    fn worktree_path_sanitizes_ref() {
666        let repo = Path::new("/repos/my-project");
667        let path = worktree_path_for(repo, "feature/branch");
668        assert!(path.ends_with("feature_branch"));
669        // Verify it's in the tmp dir, not the repo
670        assert!(!path.starts_with(repo));
671    }
672
673    #[test]
674    fn worktree_path_deterministic_per_repo() {
675        let repo = Path::new("/repos/my-project");
676        let path1 = worktree_path_for(repo, "v1.0.0");
677        let path2 = worktree_path_for(repo, "v1.0.0");
678        assert_eq!(path1, path2);
679    }
680
681    #[test]
682    fn worktree_path_different_repos_differ() {
683        let repo_a = Path::new("/repos/project-a");
684        let repo_b = Path::new("/repos/project-b");
685        let path_a = worktree_path_for(repo_a, "v1.0.0");
686        let path_b = worktree_path_for(repo_b, "v1.0.0");
687        assert_ne!(path_a, path_b);
688    }
689
690    // ── Deprecation commit co-change analysis tests ─────────────────
691
692    #[test]
693    fn extract_component_from_deprecated_path_standard() {
694        assert_eq!(
695            extract_component_from_deprecated_path(
696                "packages/react-core/src/deprecated/components/Tile/Tile.tsx"
697            ),
698            Some("Tile".to_string())
699        );
700    }
701
702    #[test]
703    fn extract_component_from_deprecated_path_nested() {
704        assert_eq!(
705            extract_component_from_deprecated_path(
706                "packages/react-core/src/deprecated/components/Modal/ModalBox.tsx"
707            ),
708            Some("Modal".to_string())
709        );
710    }
711
712    #[test]
713    fn extract_component_from_deprecated_path_non_deprecated() {
714        assert_eq!(
715            extract_component_from_deprecated_path(
716                "packages/react-core/src/components/Card/Card.tsx"
717            ),
718            None
719        );
720    }
721
722    #[test]
723    fn extract_family_from_components_path_standard() {
724        assert_eq!(
725            extract_family_from_components_path(
726                "packages/react-core/src/components/Card/CardHeader.tsx"
727            ),
728            Some("Card".to_string())
729        );
730    }
731
732    #[test]
733    fn extract_family_from_components_path_excludes_deprecated() {
734        // Should not match deprecated/components paths
735        assert_eq!(
736            extract_family_from_components_path(
737                "packages/react-core/src/deprecated/components/Tile/Tile.tsx"
738            ),
739            None
740        );
741    }
742
743    #[test]
744    fn extract_family_from_components_path_no_match() {
745        assert_eq!(
746            extract_family_from_components_path("packages/react-core/src/helpers/util.ts"),
747            None
748        );
749    }
750}