Skip to main content

mimir_core/
workspace.rs

1//! `WorkspaceId` — stable identifier for a workspace. Implements
2//! `docs/concepts/workspace-model.md` § 3.
3
4use std::fmt;
5use std::path::{Path, PathBuf};
6
7use sha2::{Digest, Sha256};
8use thiserror::Error;
9use ulid::Ulid;
10
11/// Errors returned when constructing a [`WorkspaceId`].
12#[derive(Debug, Error, PartialEq)]
13pub enum WorkspaceIdError {
14    /// The git-remote URL provided to [`WorkspaceId::from_git_remote`] was
15    /// empty after normalisation.
16    #[error("empty git remote URL")]
17    EmptyRemote,
18}
19
20/// Errors returned by workspace detection.
21#[derive(Debug, Error)]
22pub enum WorkspaceError {
23    /// A filesystem operation failed while walking ancestors or reading
24    /// the git config.
25    #[error("workspace I/O error: {0}")]
26    Io(#[source] std::io::Error),
27
28    /// A `.git` directory was found but its `config` file lacked an
29    /// `[remote "origin"]` section or a `url` key within it.
30    #[error("{path}: .git/config has no origin remote URL")]
31    NoOriginRemote {
32        /// Path to the `.git` directory where the config was inspected.
33        path: PathBuf,
34    },
35
36    /// The origin URL from `.git/config` was malformed (empty after
37    /// normalisation).
38    #[error("{path}: origin URL normalises to empty string")]
39    InvalidRemote {
40        /// Path to the `.git` directory where the malformed URL was read.
41        path: PathBuf,
42    },
43
44    /// No `.git` directory found at or above the starting path, and no
45    /// explicit non-git workspace marker provided.
46    #[error("no active workspace: walked to filesystem root from {start} without finding .git")]
47    NoActiveWorkspace {
48        /// The walk origin.
49        start: PathBuf,
50    },
51}
52
53/// A workspace identifier.
54///
55/// Stable across sessions and across machines. Two mechanisms exist
56/// (per `docs/concepts/workspace-model.md` § 3):
57///
58/// - **Git-backed** — `WorkspaceId::from_git_remote(origin_url)` produces
59///   a deterministic hash of the normalised remote URL. Branch is *not*
60///   part of the ID by default; all branches of a repo share one
61///   workspace.
62/// - **Non-git** — `WorkspaceId::from_ulid(Ulid)` creates an explicit
63///   identifier for workspaces not backed by a git repo.
64///
65/// # Examples
66///
67/// ```
68/// # #![allow(clippy::unwrap_used)]
69/// use mimir_core::WorkspaceId;
70///
71/// let a = WorkspaceId::from_git_remote("git@github.com:buildepicshit/Mimir.git").unwrap();
72/// let b = WorkspaceId::from_git_remote("https://github.com/buildepicshit/Mimir").unwrap();
73/// // The normalisation collapses scheme and trailing `.git` so equivalent
74/// // remotes resolve to equivalent workspace IDs where host+path match.
75/// // (Exact normalisation rules live in §§ 3.1 of workspace-model.md.)
76/// assert_ne!(a, b); // SSH and HTTPS remotes with different hosts do differ.
77/// ```
78#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
79pub struct WorkspaceId([u8; 32]);
80
81impl WorkspaceId {
82    /// Compute a workspace ID from a git `origin` remote URL.
83    ///
84    /// Normalisation performed before hashing:
85    ///
86    /// 1. Trim surrounding whitespace.
87    /// 2. Lowercase the whole URL.
88    /// 3. Strip any trailing `.git` suffix.
89    /// 4. Strip any trailing slash.
90    ///
91    /// Branch is intentionally not included (see `workspace-model.md` § 3.1).
92    ///
93    /// # Errors
94    ///
95    /// Returns [`WorkspaceIdError::EmptyRemote`] if the URL is empty after
96    /// normalisation.
97    pub fn from_git_remote(origin_url: &str) -> Result<Self, WorkspaceIdError> {
98        let normalised = normalise_git_remote(origin_url);
99        if normalised.is_empty() {
100            return Err(WorkspaceIdError::EmptyRemote);
101        }
102        let mut hasher = Sha256::new();
103        hasher.update(normalised.as_bytes());
104        let digest = hasher.finalize();
105        let mut bytes = [0_u8; 32];
106        bytes.copy_from_slice(&digest);
107        Ok(Self(bytes))
108    }
109
110    /// Construct from an explicit ULID — for non-git workspaces created
111    /// via `mimir init --workspace <name>` (see `workspace-model.md` § 3.2).
112    #[must_use]
113    pub fn from_ulid(ulid: Ulid) -> Self {
114        let mut bytes = [0_u8; 32];
115        let raw = ulid.to_bytes();
116        // Place the 16-byte ULID in the high half; low half left zero.
117        bytes[..16].copy_from_slice(&raw);
118        Self(bytes)
119    }
120
121    /// The raw 32-byte hash.
122    #[must_use]
123    pub const fn as_bytes(&self) -> &[u8; 32] {
124        &self.0
125    }
126
127    /// Walk up from `start` looking for a `.git/` directory and, on
128    /// finding one, read `origin` remote URL from `.git/config` and
129    /// hash it per [`WorkspaceId::from_git_remote`].
130    ///
131    /// Implements `workspace-model.md` § 3.3 step 1 (git-backed
132    /// workspaces). Returns [`WorkspaceError::NoActiveWorkspace`] if
133    /// the walk reaches the filesystem root without finding `.git/`.
134    ///
135    /// # Errors
136    ///
137    /// - [`WorkspaceError::Io`] on filesystem read failure.
138    /// - [`WorkspaceError::NoActiveWorkspace`] if no `.git/` is found.
139    /// - [`WorkspaceError::NoOriginRemote`] if the config has no
140    ///   `[remote "origin"] url = ...` entry.
141    /// - [`WorkspaceError::InvalidRemote`] if the origin URL
142    ///   normalises to an empty string.
143    pub fn detect_from_path(start: &Path) -> Result<Self, WorkspaceError> {
144        // `canonicalize` may fail if `start` doesn't exist yet — that's
145        // a legitimate case (detection can run against a path being
146        // set up). Fall back to the literal path so the ancestor walk
147        // still operates.
148        let start_abs = start.canonicalize().unwrap_or_else(|_| start.to_path_buf());
149        let mut cursor: &Path = &start_abs;
150        loop {
151            let git_dir = cursor.join(".git");
152            if git_dir.is_dir() {
153                let config_path = git_dir.join("config");
154                let contents = std::fs::read_to_string(&config_path).map_err(WorkspaceError::Io)?;
155                let origin_url = parse_git_config_origin_url(&contents).ok_or_else(|| {
156                    WorkspaceError::NoOriginRemote {
157                        path: git_dir.clone(),
158                    }
159                })?;
160                return Self::from_git_remote(&origin_url).map_err(|_| {
161                    WorkspaceError::InvalidRemote {
162                        path: git_dir.clone(),
163                    }
164                });
165            }
166            match cursor.parent() {
167                Some(parent) if parent != cursor => cursor = parent,
168                _ => {
169                    return Err(WorkspaceError::NoActiveWorkspace { start: start_abs });
170                }
171            }
172        }
173    }
174}
175
176/// Parse a `.git/config`-shaped string and return the `origin` remote
177/// URL if present. Tolerant of tabs, spaces, and missing-quote
178/// variations in the section header.
179///
180/// Exposed `pub` for tests and for future `workspace init` tooling
181/// that may want to validate its own config before writing.
182#[must_use]
183pub fn parse_git_config_origin_url(config: &str) -> Option<String> {
184    let mut in_origin_section = false;
185    for line in config.lines() {
186        let line = line.trim();
187        if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
188            continue;
189        }
190        if line.starts_with('[') {
191            // Section header. Match `[remote "origin"]` with tolerance
192            // for extra whitespace.
193            let head = line.trim_matches(|c: char| c == '[' || c == ']').trim();
194            in_origin_section =
195                head == "remote \"origin\"" || head == "remote 'origin'" || head == "remote origin";
196            continue;
197        }
198        if in_origin_section {
199            // Looking for `url = <value>`.
200            if let Some(rest) = line.strip_prefix("url") {
201                let rest = rest.trim_start();
202                if let Some(value) = rest.strip_prefix('=') {
203                    return Some(value.trim().to_string());
204                }
205            }
206        }
207    }
208    None
209}
210
211impl fmt::Display for WorkspaceId {
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        for byte in &self.0[..8] {
214            write!(f, "{byte:02x}")?;
215        }
216        Ok(())
217    }
218}
219
220fn normalise_git_remote(url: &str) -> String {
221    let trimmed = url.trim().to_ascii_lowercase();
222    let stripped = trimmed.strip_suffix(".git").unwrap_or(&trimmed);
223    let stripped = stripped.strip_suffix('/').unwrap_or(stripped);
224    stripped.to_string()
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn empty_remote_rejected() {
233        assert_eq!(
234            WorkspaceId::from_git_remote("   "),
235            Err(WorkspaceIdError::EmptyRemote),
236        );
237    }
238
239    #[test]
240    fn trailing_git_collapses() {
241        let a = WorkspaceId::from_git_remote("https://github.com/foo/bar.git").unwrap();
242        let b = WorkspaceId::from_git_remote("https://github.com/foo/bar").unwrap();
243        assert_eq!(a, b);
244    }
245
246    #[test]
247    fn case_insensitive() {
248        let a = WorkspaceId::from_git_remote("https://GitHub.com/Foo/Bar.git").unwrap();
249        let b = WorkspaceId::from_git_remote("https://github.com/foo/bar").unwrap();
250        assert_eq!(a, b);
251    }
252
253    #[test]
254    fn distinct_remotes_distinct_ids() {
255        let a = WorkspaceId::from_git_remote("https://github.com/foo/mimir").unwrap();
256        let b = WorkspaceId::from_git_remote("https://github.com/foo/other").unwrap();
257        assert_ne!(a, b);
258    }
259
260    #[test]
261    fn ulid_workspace_is_stable() {
262        let ulid = Ulid::from_parts(42, 99);
263        let a = WorkspaceId::from_ulid(ulid);
264        let b = WorkspaceId::from_ulid(ulid);
265        assert_eq!(a, b);
266    }
267
268    #[test]
269    fn display_is_eight_hex_bytes() {
270        let id = WorkspaceId::from_git_remote("https://github.com/example/mimir").unwrap();
271        let formatted = format!("{id}");
272        assert_eq!(formatted.len(), 16);
273        assert!(formatted.chars().all(|c| c.is_ascii_hexdigit()));
274    }
275
276    // ----- git config parser -----
277
278    #[test]
279    fn parse_origin_url_from_standard_config() {
280        let config = r#"
281            [core]
282                    repositoryformatversion = 0
283                    filemode = true
284            [remote "origin"]
285                    url = git@github.com:foo/bar.git
286                    fetch = +refs/heads/*:refs/remotes/origin/*
287        "#;
288        assert_eq!(
289            parse_git_config_origin_url(config),
290            Some("git@github.com:foo/bar.git".to_string())
291        );
292    }
293
294    #[test]
295    fn parse_origin_url_stops_at_next_section() {
296        let config = r#"
297            [remote "origin"]
298                    url = https://github.com/foo/mimir
299            [remote "upstream"]
300                    url = https://github.com/bar/mimir
301        "#;
302        assert_eq!(
303            parse_git_config_origin_url(config),
304            Some("https://github.com/foo/mimir".to_string())
305        );
306    }
307
308    #[test]
309    fn parse_origin_url_returns_none_when_no_origin() {
310        let config = r#"
311            [core]
312                    bare = false
313            [remote "upstream"]
314                    url = https://github.com/other/repo.git
315        "#;
316        assert_eq!(parse_git_config_origin_url(config), None);
317    }
318
319    #[test]
320    fn parse_origin_url_skips_comments() {
321        let config = r#"
322            # remote origin is the canonical upstream
323            ; and here's a semicolon comment
324            [remote "origin"]
325                    # url = https://commented.out/repo
326                    url = https://real.example/repo.git
327        "#;
328        assert_eq!(
329            parse_git_config_origin_url(config),
330            Some("https://real.example/repo.git".to_string())
331        );
332    }
333
334    // ----- detect_from_path -----
335
336    fn write_fake_git_repo(root: &std::path::Path, origin_url: &str) {
337        let git_dir = root.join(".git");
338        std::fs::create_dir_all(&git_dir).unwrap();
339        std::fs::write(
340            git_dir.join("config"),
341            format!(
342                "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = {origin_url}\n"
343            ),
344        )
345        .unwrap();
346    }
347
348    #[test]
349    fn detect_finds_git_at_start_path() {
350        let dir = tempfile::TempDir::new().unwrap();
351        write_fake_git_repo(dir.path(), "https://github.com/foo/mimir.git");
352        let id = WorkspaceId::detect_from_path(dir.path()).unwrap();
353        let expected = WorkspaceId::from_git_remote("https://github.com/foo/mimir.git").unwrap();
354        assert_eq!(id, expected);
355    }
356
357    #[test]
358    fn detect_walks_up_to_find_git() {
359        let dir = tempfile::TempDir::new().unwrap();
360        write_fake_git_repo(dir.path(), "https://github.com/foo/mimir.git");
361        let subdir = dir.path().join("crates").join("mimir_core").join("src");
362        std::fs::create_dir_all(&subdir).unwrap();
363        let id = WorkspaceId::detect_from_path(&subdir).unwrap();
364        let expected = WorkspaceId::from_git_remote("https://github.com/foo/mimir.git").unwrap();
365        assert_eq!(id, expected);
366    }
367
368    #[test]
369    fn detect_returns_no_active_workspace_on_empty_dir() {
370        let dir = tempfile::TempDir::new().unwrap();
371        let err = WorkspaceId::detect_from_path(dir.path()).unwrap_err();
372        assert!(matches!(err, WorkspaceError::NoActiveWorkspace { .. }));
373    }
374
375    #[test]
376    fn detect_returns_no_origin_if_config_missing_origin() {
377        let dir = tempfile::TempDir::new().unwrap();
378        let git_dir = dir.path().join(".git");
379        std::fs::create_dir_all(&git_dir).unwrap();
380        std::fs::write(git_dir.join("config"), "[core]\n\tbare = false\n").unwrap();
381        let err = WorkspaceId::detect_from_path(dir.path()).unwrap_err();
382        assert!(matches!(err, WorkspaceError::NoOriginRemote { .. }));
383    }
384
385    // ----- write-scope enforcement (structural) -----
386
387    #[test]
388    fn distinct_workspaces_produce_distinct_ids_across_forks() {
389        // Spec § 3.1: a fork is a new workspace.
390        let original = WorkspaceId::from_git_remote("https://github.com/upstream/mimir").unwrap();
391        let fork = WorkspaceId::from_git_remote("https://github.com/fork/mimir").unwrap();
392        assert_ne!(original, fork);
393    }
394
395    #[test]
396    fn mirror_clones_converge_to_same_workspace() {
397        // Spec § 3.1: mirror clones (same remote) are the same
398        // workspace regardless of local path.
399        let a = WorkspaceId::from_git_remote("https://github.com/foo/mimir.git").unwrap();
400        let b = WorkspaceId::from_git_remote("https://github.com/foo/mimir").unwrap();
401        assert_eq!(a, b);
402    }
403}