Skip to main content

git_lfs_git/
aliases.rs

1//! `url.<base>.insteadOf <alias>` rewrite handling.
2//!
3//! Git lets users define URL prefix aliases in config:
4//! `git config url."https://github.com/".insteadOf gh:` makes any URL
5//! starting with `gh:` rewrite to `https://github.com/...`. The git
6//! tooling applies this rewrite universally; LFS has to do the same
7//! so settings like `lfs.url = gh:org/repo` resolve the same way the
8//! user's `git fetch` already does.
9//!
10//! The rewrite logic itself is dead simple — pick the longest alias
11//! that's a prefix of the input URL and swap it for the configured
12//! base. The only subtlety is duplicate detection: when two
13//! `url.<base>.insteadOf` entries share the *same* alias value but
14//! disagree on the base, we emit
15//! `warning: Multiple 'url.*.insteadof' keys with the same alias: ...`
16//! once per process, mirroring upstream.
17
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::process::Command;
21use std::sync::{Mutex, OnceLock};
22
23use crate::Error;
24
25/// Maps `<alias>` → `<base>` for every `url.<base>.insteadOf <alias>`
26/// entry in the effective git config.
27pub type Aliases = HashMap<String, String>;
28
29/// Load the alias map for `cwd`, warning once-per-process about
30/// conflicts. Cached so repeated calls within one process don't fire
31/// `git config` again.
32pub fn load_aliases(cwd: &Path) -> Result<Aliases, Error> {
33    let canon = cwd.canonicalize().unwrap_or_else(|_| cwd.to_path_buf());
34    if let Some(cached) = aliases_cache().lock().unwrap().get(&canon) {
35        return Ok(cached.clone());
36    }
37
38    let entries = list_insteadof_entries(cwd)?;
39    let aliases = build_aliases(&entries);
40
41    aliases_cache()
42        .lock()
43        .unwrap()
44        .insert(canon, aliases.clone());
45    Ok(aliases)
46}
47
48/// Apply `url.*.insteadOf` rewriting to `url`, returning the longest-
49/// prefix-match rewrite or the original string if nothing matches.
50pub fn rewrite(cwd: &Path, url: &str) -> Result<String, Error> {
51    let aliases = load_aliases(cwd)?;
52    Ok(apply(&aliases, url))
53}
54
55/// Pure function: given a built alias map and a URL, do the longest-
56/// prefix-match rewrite. Split out so unit tests don't need a temp
57/// repo, and exposed so callers that already hold an [`Aliases`] map
58/// (e.g. the transfer queue, which captures the map once at startup
59/// instead of re-locking the per-call cache) can apply it directly.
60pub fn apply(aliases: &Aliases, url: &str) -> String {
61    let mut best: Option<&str> = None;
62    for alias in aliases.keys() {
63        if !url.starts_with(alias.as_str()) {
64            continue;
65        }
66        if best.is_none_or(|b| alias.len() > b.len()) {
67            best = Some(alias);
68        }
69    }
70    match best {
71        Some(alias) => format!("{}{}", aliases[alias], &url[alias.len()..]),
72        None => url.to_owned(),
73    }
74}
75
76/// One parsed `url.<base>.insteadOf <alias>` entry.
77struct InsteadOf {
78    base: String,
79    alias: String,
80}
81
82/// Read every `url.*.insteadOf` from the effective config (all
83/// scopes), preserving ordering and duplicates so callers can apply
84/// upstream's "first-seen wins / warn on conflict" rule.
85fn list_insteadof_entries(cwd: &Path) -> Result<Vec<InsteadOf>, Error> {
86    // `--null` so newlines in URLs (which never happen in practice but
87    // *would* break the default key/value separator) and special chars
88    // in alias values come out unambiguously.
89    let out = Command::new("git")
90        .arg("-C")
91        .arg(cwd)
92        .args([
93            "config",
94            "--includes",
95            "--null",
96            "--get-regexp",
97            r"^url\..*\.insteadof$",
98        ])
99        .output()?;
100    // Exit 1 just means "no matches" — common case.
101    match out.status.code() {
102        Some(0) => {}
103        Some(1) => return Ok(Vec::new()),
104        _ => {
105            return Err(Error::Failed(format!(
106                "git config --get-regexp insteadof failed: {}",
107                String::from_utf8_lossy(&out.stderr).trim()
108            )));
109        }
110    }
111
112    let mut entries = Vec::new();
113    for record in out.stdout.split(|&b| b == 0) {
114        if record.is_empty() {
115            continue;
116        }
117        // Each record is `<key>\n<value>` — `--null` separates entries
118        // but uses the literal newline between key and value.
119        let s = std::str::from_utf8(record)
120            .map_err(|e| Error::Failed(format!("non-utf8 insteadof entry: {e}")))?;
121        let (key, value) = match s.split_once('\n') {
122            Some(kv) => kv,
123            None => continue,
124        };
125        // Strip `url.` prefix and `.insteadof` suffix to recover the
126        // base URL, which can itself contain dots.
127        let trimmed = match key.strip_prefix("url.") {
128            Some(s) => s,
129            None => continue,
130        };
131        let base = match trimmed.strip_suffix(".insteadof") {
132            Some(s) => s,
133            None => continue,
134        };
135        entries.push(InsteadOf {
136            base: base.to_owned(),
137            alias: value.to_owned(),
138        });
139    }
140    Ok(entries)
141}
142
143/// Convert the raw entry list into a map, emitting the conflict
144/// warning when an alias maps to two different bases. First-seen base
145/// wins, matching upstream — but iteration order from `git config
146/// --get-regexp` is config-file order, so this is deterministic.
147fn build_aliases(entries: &[InsteadOf]) -> Aliases {
148    let mut map = Aliases::new();
149    let mut warned: std::collections::HashSet<String> = Default::default();
150    for entry in entries {
151        if let Some(existing) = map.get(&entry.alias) {
152            if existing != &entry.base && warned.insert(entry.alias.clone()) {
153                eprintln!(
154                    "warning: Multiple 'url.*.insteadof' keys with the same alias: {:?}",
155                    entry.alias
156                );
157            }
158            // First-seen base wins (matches upstream's `if v != url`
159            // path: it warns but doesn't overwrite).
160            continue;
161        }
162        map.insert(entry.alias.clone(), entry.base.clone());
163    }
164    map
165}
166
167static ALIASES_CACHE: OnceLock<Mutex<HashMap<PathBuf, Aliases>>> = OnceLock::new();
168
169fn aliases_cache() -> &'static Mutex<HashMap<PathBuf, Aliases>> {
170    ALIASES_CACHE.get_or_init(|| Mutex::new(HashMap::new()))
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176
177    #[test]
178    fn apply_returns_input_when_no_alias_matches() {
179        let aliases = Aliases::new();
180        assert_eq!(
181            apply(&aliases, "https://github.com/foo"),
182            "https://github.com/foo"
183        );
184    }
185
186    #[test]
187    fn apply_rewrites_simple_prefix() {
188        let mut aliases = Aliases::new();
189        aliases.insert("alias:".into(), "http://actual-url/".into());
190        assert_eq!(apply(&aliases, "alias:rest"), "http://actual-url/rest");
191    }
192
193    #[test]
194    fn apply_picks_longest_match() {
195        let mut aliases = Aliases::new();
196        // `alias` and `alias:` both prefix `alias:rest`, but the
197        // longer match wins.
198        aliases.insert("alias".into(), "http://wrong-url/".into());
199        aliases.insert("alias:".into(), "http://actual-url/".into());
200        assert_eq!(apply(&aliases, "alias:rest"), "http://actual-url/rest");
201    }
202
203    #[test]
204    fn apply_does_not_rewrite_non_prefix() {
205        let mut aliases = Aliases::new();
206        aliases.insert("alias:".into(), "http://actual-url/".into());
207        // Doesn't start with the alias, so left alone.
208        assert_eq!(apply(&aliases, "badalias:rest"), "badalias:rest");
209    }
210
211    #[test]
212    fn build_aliases_does_not_warn_on_duplicate_same_value() {
213        // Two entries with the same alias *and* the same base → no
214        // conflict, no warning. (We can't capture stderr here, but
215        // we can at least exercise the path and check the resulting
216        // map.)
217        let entries = vec![
218            InsteadOf {
219                base: "https://host.example/domain/".into(),
220                alias: "git@host.example:domain/".into(),
221            },
222            InsteadOf {
223                base: "https://host.example/domain/".into(),
224                alias: "git@host.example:domain/".into(),
225            },
226        ];
227        let map = build_aliases(&entries);
228        assert_eq!(map.len(), 1);
229        assert_eq!(
230            map["git@host.example:domain/"],
231            "https://host.example/domain/"
232        );
233    }
234
235    #[test]
236    fn build_aliases_keeps_first_base_on_conflict() {
237        let entries = vec![
238            InsteadOf {
239                base: "http://actual-url/".into(),
240                alias: "alias:".into(),
241            },
242            InsteadOf {
243                base: "http://dupe-url".into(),
244                alias: "alias:".into(),
245            },
246        ];
247        let map = build_aliases(&entries);
248        assert_eq!(map["alias:"], "http://actual-url/");
249    }
250
251    #[test]
252    fn build_aliases_handles_multiple_distinct_aliases() {
253        let entries = vec![
254            InsteadOf {
255                base: "http://actual-url/".into(),
256                alias: "alias:".into(),
257            },
258            InsteadOf {
259                base: "http://actual-url/".into(),
260                alias: "alias2:".into(),
261            },
262        ];
263        let map = build_aliases(&entries);
264        assert_eq!(map["alias:"], "http://actual-url/");
265        assert_eq!(map["alias2:"], "http://actual-url/");
266    }
267}