Skip to main content

git_lfs_git/
aliases.rs

1//! `url.<base>.insteadOf <alias>` rewrite handling.
2//!
3//! Git lets users define URL prefix aliases in config:
4//! `git config url."https://github.com/".insteadOf gh:` makes any URL
5//! starting with `gh:` rewrite to `https://github.com/...`. The git
6//! tooling applies this rewrite universally; LFS has to do the same
7//! so settings like `lfs.url = gh:org/repo` resolve the same way the
8//! user's `git fetch` already does.
9//!
10//! The rewrite logic itself is dead simple — pick the longest alias
11//! that's a prefix of the input URL and swap it for the configured
12//! base. The only subtlety is duplicate detection: when two
13//! `url.<base>.insteadOf` entries share the *same* alias value but
14//! disagree on the base, we emit
15//! `warning: Multiple 'url.*.insteadof' keys with the same alias: ...`
16//! once per process, mirroring upstream.
17
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::process::Command;
21use std::sync::{Mutex, OnceLock};
22
23use crate::Error;
24
25/// Maps `<alias>` → `<base>` for every `url.<base>.insteadOf <alias>`
26/// entry in the effective git config.
27pub type Aliases = HashMap<String, String>;
28
29/// Load the alias map for `cwd`, warning once-per-process about
30/// conflicts. Cached so repeated calls within one process don't fire
31/// `git config` again.
32pub fn load_aliases(cwd: &Path) -> Result<Aliases, Error> {
33    let canon = cwd.canonicalize().unwrap_or_else(|_| cwd.to_path_buf());
34    if let Some(cached) = aliases_cache().lock().unwrap().get(&canon) {
35        return Ok(cached.clone());
36    }
37
38    let entries = list_aliases_entries(cwd, "insteadof")?;
39    let aliases = build_aliases(&entries);
40
41    aliases_cache()
42        .lock()
43        .unwrap()
44        .insert(canon, aliases.clone());
45    Ok(aliases)
46}
47
48/// Load the push-direction alias map (`url.<base>.pushInsteadOf`) for
49/// `cwd`. Cached separately from `load_aliases`. Falls back to an empty
50/// map when no `pushInsteadOf` entries are configured — callers should
51/// then use [`load_aliases`] for the upload path too, so plain
52/// `insteadOf` still applies.
53pub fn load_push_aliases(cwd: &Path) -> Result<Aliases, Error> {
54    let canon = cwd.canonicalize().unwrap_or_else(|_| cwd.to_path_buf());
55    if let Some(cached) = push_aliases_cache().lock().unwrap().get(&canon) {
56        return Ok(cached.clone());
57    }
58
59    let entries = list_aliases_entries(cwd, "pushinsteadof")?;
60    let aliases = build_aliases(&entries);
61
62    push_aliases_cache()
63        .lock()
64        .unwrap()
65        .insert(canon, aliases.clone());
66    Ok(aliases)
67}
68
69/// Apply `url.*.insteadOf` rewriting to `url`, returning the longest-
70/// prefix-match rewrite or the original string if nothing matches.
71pub fn rewrite(cwd: &Path, url: &str) -> Result<String, Error> {
72    let aliases = load_aliases(cwd)?;
73    Ok(apply(&aliases, url))
74}
75
76/// Pure function: given a built alias map and a URL, do the longest-
77/// prefix-match rewrite. Split out so unit tests don't need a temp
78/// repo, and exposed so callers that already hold an [`Aliases`] map
79/// (e.g. the transfer queue, which captures the map once at startup
80/// instead of re-locking the per-call cache) can apply it directly.
81pub fn apply(aliases: &Aliases, url: &str) -> String {
82    let mut best: Option<&str> = None;
83    for alias in aliases.keys() {
84        if !url.starts_with(alias.as_str()) {
85            continue;
86        }
87        if best.is_none_or(|b| alias.len() > b.len()) {
88            best = Some(alias);
89        }
90    }
91    match best {
92        Some(alias) => format!("{}{}", aliases[alias], &url[alias.len()..]),
93        None => url.to_owned(),
94    }
95}
96
97/// One parsed `url.<base>.insteadOf <alias>` entry.
98struct InsteadOf {
99    base: String,
100    alias: String,
101}
102
103/// Read every `url.*.<suffix>` from the effective config (all scopes),
104/// preserving ordering and duplicates so callers can apply upstream's
105/// "first-seen wins / warn on conflict" rule. `suffix` is `"insteadof"`
106/// for the download/general path or `"pushinsteadof"` for the upload
107/// path; git's config parsing is case-insensitive on the key, so the
108/// lowercased form covers `pushInsteadOf` etc.
109fn list_aliases_entries(cwd: &Path, suffix: &str) -> Result<Vec<InsteadOf>, Error> {
110    let regex = format!(r"^url\..*\.{suffix}$");
111    // `--null` so newlines in URLs (which never happen in practice but
112    // *would* break the default key/value separator) and special chars
113    // in alias values come out unambiguously.
114    let out = Command::new("git")
115        .arg("-C")
116        .arg(cwd)
117        .args(["config", "--includes", "--null", "--get-regexp", &regex])
118        .output()?;
119    // Exit 1 just means "no matches" — common case.
120    match out.status.code() {
121        Some(0) => {}
122        Some(1) => return Ok(Vec::new()),
123        _ => {
124            return Err(Error::Failed(format!(
125                "git config --get-regexp {suffix} failed: {}",
126                String::from_utf8_lossy(&out.stderr).trim()
127            )));
128        }
129    }
130
131    let dot_suffix = format!(".{suffix}");
132    let mut entries = Vec::new();
133    for record in out.stdout.split(|&b| b == 0) {
134        if record.is_empty() {
135            continue;
136        }
137        // Each record is `<key>\n<value>` — `--null` separates entries
138        // but uses the literal newline between key and value.
139        let s = std::str::from_utf8(record)
140            .map_err(|e| Error::Failed(format!("non-utf8 {suffix} entry: {e}")))?;
141        let (key, value) = match s.split_once('\n') {
142            Some(kv) => kv,
143            None => continue,
144        };
145        // Strip `url.` prefix and `.<suffix>` suffix to recover the
146        // base URL, which can itself contain dots.
147        let trimmed = match key.strip_prefix("url.") {
148            Some(s) => s,
149            None => continue,
150        };
151        let base = match trimmed.strip_suffix(dot_suffix.as_str()) {
152            Some(s) => s,
153            None => continue,
154        };
155        entries.push(InsteadOf {
156            base: base.to_owned(),
157            alias: value.to_owned(),
158        });
159    }
160    Ok(entries)
161}
162
163/// Convert the raw entry list into a map, emitting the conflict
164/// warning when an alias maps to two different bases. First-seen base
165/// wins, matching upstream — but iteration order from `git config
166/// --get-regexp` is config-file order, so this is deterministic.
167fn build_aliases(entries: &[InsteadOf]) -> Aliases {
168    let mut map = Aliases::new();
169    let mut warned: std::collections::HashSet<String> = Default::default();
170    for entry in entries {
171        if let Some(existing) = map.get(&entry.alias) {
172            if existing != &entry.base && warned.insert(entry.alias.clone()) {
173                eprintln!(
174                    "warning: Multiple 'url.*.insteadof' keys with the same alias: {:?}",
175                    entry.alias
176                );
177            }
178            // First-seen base wins (matches upstream's `if v != url`
179            // path: it warns but doesn't overwrite).
180            continue;
181        }
182        map.insert(entry.alias.clone(), entry.base.clone());
183    }
184    map
185}
186
187static ALIASES_CACHE: OnceLock<Mutex<HashMap<PathBuf, Aliases>>> = OnceLock::new();
188static PUSH_ALIASES_CACHE: OnceLock<Mutex<HashMap<PathBuf, Aliases>>> = OnceLock::new();
189
190fn aliases_cache() -> &'static Mutex<HashMap<PathBuf, Aliases>> {
191    ALIASES_CACHE.get_or_init(|| Mutex::new(HashMap::new()))
192}
193
194fn push_aliases_cache() -> &'static Mutex<HashMap<PathBuf, Aliases>> {
195    PUSH_ALIASES_CACHE.get_or_init(|| Mutex::new(HashMap::new()))
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    #[test]
203    fn apply_returns_input_when_no_alias_matches() {
204        let aliases = Aliases::new();
205        assert_eq!(
206            apply(&aliases, "https://github.com/foo"),
207            "https://github.com/foo"
208        );
209    }
210
211    #[test]
212    fn apply_rewrites_simple_prefix() {
213        let mut aliases = Aliases::new();
214        aliases.insert("alias:".into(), "http://actual-url/".into());
215        assert_eq!(apply(&aliases, "alias:rest"), "http://actual-url/rest");
216    }
217
218    #[test]
219    fn apply_picks_longest_match() {
220        let mut aliases = Aliases::new();
221        // `alias` and `alias:` both prefix `alias:rest`, but the
222        // longer match wins.
223        aliases.insert("alias".into(), "http://wrong-url/".into());
224        aliases.insert("alias:".into(), "http://actual-url/".into());
225        assert_eq!(apply(&aliases, "alias:rest"), "http://actual-url/rest");
226    }
227
228    #[test]
229    fn apply_does_not_rewrite_non_prefix() {
230        let mut aliases = Aliases::new();
231        aliases.insert("alias:".into(), "http://actual-url/".into());
232        // Doesn't start with the alias, so left alone.
233        assert_eq!(apply(&aliases, "badalias:rest"), "badalias:rest");
234    }
235
236    #[test]
237    fn build_aliases_does_not_warn_on_duplicate_same_value() {
238        // Two entries with the same alias *and* the same base → no
239        // conflict, no warning. (We can't capture stderr here, but
240        // we can at least exercise the path and check the resulting
241        // map.)
242        let entries = vec![
243            InsteadOf {
244                base: "https://host.example/domain/".into(),
245                alias: "git@host.example:domain/".into(),
246            },
247            InsteadOf {
248                base: "https://host.example/domain/".into(),
249                alias: "git@host.example:domain/".into(),
250            },
251        ];
252        let map = build_aliases(&entries);
253        assert_eq!(map.len(), 1);
254        assert_eq!(
255            map["git@host.example:domain/"],
256            "https://host.example/domain/"
257        );
258    }
259
260    #[test]
261    fn build_aliases_keeps_first_base_on_conflict() {
262        let entries = vec![
263            InsteadOf {
264                base: "http://actual-url/".into(),
265                alias: "alias:".into(),
266            },
267            InsteadOf {
268                base: "http://dupe-url".into(),
269                alias: "alias:".into(),
270            },
271        ];
272        let map = build_aliases(&entries);
273        assert_eq!(map["alias:"], "http://actual-url/");
274    }
275
276    #[test]
277    fn build_aliases_handles_multiple_distinct_aliases() {
278        let entries = vec![
279            InsteadOf {
280                base: "http://actual-url/".into(),
281                alias: "alias:".into(),
282            },
283            InsteadOf {
284                base: "http://actual-url/".into(),
285                alias: "alias2:".into(),
286            },
287        ];
288        let map = build_aliases(&entries);
289        assert_eq!(map["alias:"], "http://actual-url/");
290        assert_eq!(map["alias2:"], "http://actual-url/");
291    }
292}