Skip to main content

mkit_git_bridge/
remoteid.rs

1//! Canonical remote identity (SPEC-GIT-IMPORT §8).
2//!
3//! One normalization used by every binding, guard, and attestation
4//! `remoteUrl` field, so trivially-equivalent spellings of one remote
5//! compare equal. This is a safety net against accidents, not a
6//! security boundary — mirrors and redirects are undetectable, and
7//! the push lease remains the backstop.
8
9use std::path::Path;
10
11/// Compute the canonical identity of a destination/source string.
12///
13/// Rules (§8): scp-style rewrites to `ssh://`; scheme+host lowercase;
14/// userinfo dropped; default ports stripped per scheme (`ssh` 22,
15/// `https` 443, `http` 80, `git` 9418); one trailing `/` and one
16/// trailing `.git` stripped; local paths and `file://` URLs collapse
17/// to the symlink-resolved absolute path (falling back to the
18/// lexical absolute path when the target does not exist yet).
19#[must_use]
20pub fn remote_identity(dest: &str) -> String {
21    // file:// URLs → local path handling.
22    if let Some(rest) = dest.strip_prefix("file://") {
23        let path = rest.strip_prefix("localhost").unwrap_or(rest);
24        return canonical_local(path);
25    }
26
27    // Scheme URLs.
28    if let Some((scheme, rest)) = dest.split_once("://") {
29        let scheme = scheme.to_ascii_lowercase();
30        let (authority, path) = match rest.find('/') {
31            Some(i) => (&rest[..i], &rest[i..]),
32            None => (rest, ""),
33        };
34        // Drop userinfo.
35        let host_port = authority.rsplit_once('@').map_or(authority, |(_, h)| h);
36        let (host, port) = split_host_port(host_port);
37        let host = host.to_ascii_lowercase();
38        let default_port = match scheme.as_str() {
39            "ssh" => Some("22"),
40            "https" => Some("443"),
41            "http" => Some("80"),
42            "git" => Some("9418"),
43            _ => None,
44        };
45        let port_part = match port {
46            Some(p) if Some(p) != default_port => format!(":{p}"),
47            _ => String::new(),
48        };
49        return format!("{scheme}://{host}{port_part}{}", strip_path(path));
50    }
51
52    // scp-style `[user@]host:path` — a colon before the first slash.
53    let first_seg = dest.split('/').next().unwrap_or(dest);
54    if first_seg.contains(':') && !looks_like_dos_drive(dest) {
55        // Bracket-aware split: `[::1]:path` keeps the literal intact.
56        let after_user = dest.rsplit_once('@').map_or(dest, |(_, rest)| rest);
57        let (host, path) = if after_user.starts_with('[') {
58            match after_user.find(']') {
59                Some(end) => {
60                    let host = &after_user[..=end];
61                    let path = after_user[end + 1..].strip_prefix(':').unwrap_or("");
62                    (host, path)
63                }
64                None => after_user.split_once(':').unwrap_or((after_user, "")),
65            }
66        } else {
67            after_user.split_once(':').unwrap_or((after_user, ""))
68        };
69        let host = host.to_ascii_lowercase();
70        return format!("ssh://{host}/{}", strip_path(path).trim_start_matches('/'));
71    }
72
73    // Local path.
74    canonical_local(dest)
75}
76
77/// Split `host[:port]`, leaving IPv6 bracket literals intact.
78fn split_host_port(hp: &str) -> (&str, Option<&str>) {
79    if hp.starts_with('[') {
80        // `[::1]` or `[::1]:2222`
81        match hp.find(']') {
82            Some(end) => {
83                let host = &hp[..=end];
84                let port = hp[end + 1..].strip_prefix(':');
85                (host, port)
86            }
87            None => (hp, None),
88        }
89    } else {
90        match hp.rsplit_once(':') {
91            Some((h, p)) if p.bytes().all(|b| b.is_ascii_digit()) && !p.is_empty() => (h, Some(p)),
92            _ => (hp, None),
93        }
94    }
95}
96
97fn looks_like_dos_drive(dest: &str) -> bool {
98    dest.len() >= 2
99        && dest.as_bytes()[0].is_ascii_alphabetic()
100        && dest.as_bytes()[1] == b':'
101        && matches!(dest.as_bytes().get(2), None | Some(b'/' | b'\\'))
102}
103
104/// Strip exactly one trailing `/` then one trailing `.git`.
105fn strip_path(path: &str) -> String {
106    let p = path.strip_suffix('/').unwrap_or(path);
107    let p = p.strip_suffix(".git").unwrap_or(p);
108    p.to_owned()
109}
110
111fn canonical_local(path: &str) -> String {
112    let p = strip_path(path);
113    let pb = Path::new(&p);
114    let abs = pb.canonicalize().unwrap_or_else(|_| {
115        // Lexical fallback for paths that don't exist (yet, or after
116        // the `.git` strip): absolutize against cwd and normalize
117        // `.`/`..` components, so `/a/b` + `../up` and `/a/up` agree.
118        let joined = if pb.is_absolute() {
119            pb.to_path_buf()
120        } else {
121            std::env::current_dir().map_or_else(|_| pb.to_path_buf(), |c| c.join(pb))
122        };
123        lexical_normalize(&joined)
124    });
125    abs.to_string_lossy().into_owned()
126}
127
128/// Resolve `.` and `..` components lexically (no filesystem access).
129fn lexical_normalize(p: &Path) -> std::path::PathBuf {
130    use std::path::Component;
131    let mut out = std::path::PathBuf::new();
132    for c in p.components() {
133        match c {
134            Component::CurDir => {}
135            Component::ParentDir => {
136                if !out.pop() {
137                    out.push("..");
138                }
139            }
140            other => out.push(other.as_os_str()),
141        }
142    }
143    out
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn url_equivalence_table() {
152        // SPEC-GIT-IMPORT §8's worked example.
153        let canonical = remote_identity("ssh://github.com/org/repo");
154        for spelling in [
155            "git@github.com:org/repo.git",
156            "ssh://GIT@GITHUB.COM:22/org/repo/",
157            "ssh://github.com/org/repo.git",
158        ] {
159            assert_eq!(remote_identity(spelling), canonical, "{spelling}");
160        }
161        // https default port + case + .git
162        assert_eq!(
163            remote_identity("HTTPS://GitHub.com:443/Org/Repo.git"),
164            "https://github.com/Org/Repo",
165            "host lowercases; path case is significant"
166        );
167        // Non-default port survives.
168        assert_ne!(remote_identity("ssh://github.com:2222/org/repo"), canonical);
169        // http port 80.
170        assert_eq!(
171            remote_identity("http://host:80/r"),
172            remote_identity("http://HOST/r")
173        );
174    }
175
176    #[test]
177    fn ipv6_and_dos_paths() {
178        assert_eq!(remote_identity("[::1]:path/repo"), "ssh://[::1]/path/repo");
179        assert_eq!(
180            remote_identity("ssh://[::A]:22/r"),
181            remote_identity("ssh://[::a]/r")
182        );
183        // DOS drives are paths, not scp remotes.
184        assert!(!remote_identity("C:/repos/x").starts_with("ssh://"));
185    }
186
187    #[test]
188    fn relative_dotdot_paths_normalize_lexically() {
189        let td = tempfile::tempdir().unwrap();
190        // canonicalize: macOS tempdirs live behind the /var symlink,
191        // and cwd always reports the resolved spelling.
192        let a = td.path().canonicalize().unwrap().join("a");
193        std::fs::create_dir_all(a.join("b")).unwrap();
194        let prev = std::env::current_dir().unwrap();
195        std::env::set_current_dir(a.join("b")).unwrap();
196        // `../up.git` doesn't exist: the `.git`-stripped fallback must
197        // still agree with the identity seen from the absolutized
198        // clone URL (`<td>/a/up.git` → `<td>/a/up`).
199        let from_rel = remote_identity("../up.git");
200        std::env::set_current_dir(&prev).unwrap();
201        let from_abs = remote_identity(&format!("{}/up.git", a.display()));
202        assert_eq!(from_rel, from_abs);
203    }
204
205    #[test]
206    fn local_paths_collapse_through_symlinks() {
207        let td = tempfile::tempdir().unwrap();
208        let real = td.path().join("real");
209        std::fs::create_dir(&real).unwrap();
210        let link = td.path().join("link");
211        #[cfg(unix)]
212        std::os::unix::fs::symlink(&real, &link).unwrap();
213        #[cfg(unix)]
214        assert_eq!(
215            remote_identity(link.to_str().unwrap()),
216            remote_identity(real.to_str().unwrap())
217        );
218        // file:// collapses to the same identity.
219        assert_eq!(
220            remote_identity(&format!("file://{}", real.display())),
221            remote_identity(real.to_str().unwrap())
222        );
223    }
224}