Skip to main content

gitcortex_store/
branch.rs

1use std::{
2    fs,
3    hash::{DefaultHasher, Hash, Hasher},
4    path::{Path, PathBuf},
5};
6
7use gitcortex_core::error::{GitCortexError, Result};
8
9// ── Branch name sanitization ──────────────────────────────────────────────────
10
11/// Sanitize a branch name so it can be used as a KuzuDB table name prefix.
12///
13/// Rules applied (in order):
14/// - `/`  → `__`  (preserves branch hierarchy visibility)
15/// - any remaining non-alphanumeric char → `_`
16/// - leading digit → prefix with `b_` (table names can't start with a digit)
17///
18/// Examples:
19/// - `main`           → `main`
20/// - `feat/auth`      → `feat__auth`
21/// - `feat/auth-v2`   → `feat__auth_v2`
22/// - `release/v1.0`   → `release__v1_0`
23pub fn sanitize(branch: &str) -> String {
24    let expanded = branch.replace('/', "__");
25    let mut s: String = expanded
26        .chars()
27        .map(|c| {
28            if c.is_alphanumeric() || c == '_' {
29                c
30            } else {
31                '_'
32            }
33        })
34        .collect();
35
36    if s.starts_with(|c: char| c.is_ascii_digit()) {
37        s.insert_str(0, "b_");
38    }
39    s
40}
41
42// ── Repository identity ───────────────────────────────────────────────────────
43
44/// Derive a stable 16-hex-char ID from the absolute repo root path.
45/// Used to namespace per-repo data directories without path encoding issues.
46pub fn repo_id(repo_root: &Path) -> String {
47    let mut hasher = DefaultHasher::new();
48    repo_root.to_string_lossy().hash(&mut hasher);
49    format!("{:016x}", hasher.finish())
50}
51
52// ── XDG data paths ────────────────────────────────────────────────────────────
53
54/// Root data directory for a repo: `$XDG_DATA_HOME/gitcortex/{repo_id}/`
55pub fn data_dir(repo_id: &str) -> PathBuf {
56    let base = std::env::var("XDG_DATA_HOME")
57        .map(PathBuf::from)
58        .unwrap_or_else(|_| home_dir().join(".local/share"));
59    base.join("gitcortex").join(repo_id)
60}
61
62fn home_dir() -> PathBuf {
63    std::env::var("HOME")
64        .map(PathBuf::from)
65        .unwrap_or_else(|_| PathBuf::from("."))
66}
67
68/// Shared model cache directory: `$XDG_DATA_HOME/gitcortex/models`
69///
70/// Shared across all repos — the embedding model is identical everywhere.
71/// fastembed-rs writes the downloaded model here instead of `.fastembed_cache`
72/// in the repo root.
73pub fn models_dir() -> PathBuf {
74    let base = std::env::var("XDG_DATA_HOME")
75        .map(PathBuf::from)
76        .unwrap_or_else(|_| home_dir().join(".local/share"));
77    base.join("gitcortex").join("models")
78}
79
80/// Path to the single KuzuDB file for a repo (all branches, namespaced by table prefix).
81pub fn db_path(repo_id: &str) -> PathBuf {
82    data_dir(repo_id).join("graph.kuzu")
83}
84
85/// Path to the last-indexed SHA file for a specific branch.
86pub fn last_sha_path(repo_id: &str, branch: &str) -> PathBuf {
87    data_dir(repo_id).join(format!("{}.sha", sanitize(branch)))
88}
89
90/// Path to the persisted schema version marker for a repo.
91pub fn schema_version_path(repo_id: &str) -> PathBuf {
92    data_dir(repo_id).join("schema_version")
93}
94
95/// Read the persisted schema version, returning 0 if not present.
96pub fn read_schema_version(repo_id: &str) -> u32 {
97    let path = schema_version_path(repo_id);
98    std::fs::read_to_string(&path)
99        .ok()
100        .and_then(|s| s.trim().parse().ok())
101        .unwrap_or(0)
102}
103
104/// Write the schema version marker.
105pub fn write_schema_version(repo_id: &str, version: u32) -> Result<()> {
106    let path = schema_version_path(repo_id);
107    if let Some(parent) = path.parent() {
108        std::fs::create_dir_all(parent)?;
109    }
110    std::fs::write(&path, version.to_string()).map_err(GitCortexError::Io)
111}
112
113/// Wipe all per-repo data (DB + SHA files) so a fresh full index can run.
114pub fn wipe_repo_data(repo_id: &str) {
115    let dir = data_dir(repo_id);
116    let _ = std::fs::remove_dir_all(&dir);
117}
118
119// ── last_sha persistence ──────────────────────────────────────────────────────
120
121pub fn read_last_sha(repo_id: &str, branch: &str) -> Result<Option<String>> {
122    let path = last_sha_path(repo_id, branch);
123    match fs::read_to_string(&path) {
124        Ok(s) => Ok(Some(s.trim().to_owned())),
125        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
126        Err(e) => Err(GitCortexError::Io(e)),
127    }
128}
129
130pub fn write_last_sha(repo_id: &str, branch: &str, sha: &str) -> Result<()> {
131    let path = last_sha_path(repo_id, branch);
132    if let Some(parent) = path.parent() {
133        fs::create_dir_all(parent)?;
134    }
135    fs::write(&path, sha).map_err(GitCortexError::Io)
136}
137
138// ── Tests ─────────────────────────────────────────────────────────────────────
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[test]
145    fn sanitize_plain() {
146        assert_eq!(sanitize("main"), "main");
147    }
148
149    #[test]
150    fn sanitize_slash_becomes_double_underscore() {
151        assert_eq!(sanitize("feat/auth"), "feat__auth");
152    }
153
154    #[test]
155    fn sanitize_dash_and_dot() {
156        assert_eq!(sanitize("release/v1.0-rc"), "release__v1_0_rc");
157    }
158
159    #[test]
160    fn sanitize_leading_digit() {
161        assert_eq!(sanitize("1-hotfix"), "b_1_hotfix");
162    }
163
164    #[test]
165    fn repo_id_is_stable() {
166        let path = Path::new("/home/user/myproject");
167        assert_eq!(repo_id(path), repo_id(path));
168    }
169
170    #[test]
171    fn repo_id_differs_across_paths() {
172        let a = repo_id(Path::new("/home/user/proj-a"));
173        let b = repo_id(Path::new("/home/user/proj-b"));
174        assert_ne!(a, b);
175    }
176}