pub const HF_ENDPOINT_ENV: &str = "HF_ENDPOINT";
pub const DEFAULT_HF_ENDPOINT: &str = "https://huggingface.co";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EndpointError {
InvalidScheme(String),
Empty,
NotAUrl(String),
}
impl std::fmt::Display for EndpointError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
EndpointError::InvalidScheme(s) => write!(
f,
"HF_ENDPOINT has invalid scheme (must be http or https): {s:?}"
),
EndpointError::Empty => {
write!(f, "HF_ENDPOINT is empty or whitespace")
}
EndpointError::NotAUrl(s) => {
write!(f, "HF_ENDPOINT is not a URL (missing '://'): {s:?}")
}
}
}
}
impl std::error::Error for EndpointError {}
pub fn resolve_endpoint<'a, I>(env: I) -> Result<String, EndpointError>
where
I: IntoIterator<Item = (&'a str, &'a str)>,
{
for (k, v) in env {
if k == HF_ENDPOINT_ENV {
return parse_endpoint(v);
}
}
Ok(DEFAULT_HF_ENDPOINT.to_string())
}
pub fn parse_endpoint(raw: &str) -> Result<String, EndpointError> {
let v = raw.trim();
if v.is_empty() {
return Err(EndpointError::Empty);
}
let scheme_end = v
.find("://")
.ok_or_else(|| EndpointError::NotAUrl(v.to_string()))?;
let scheme = &v[..scheme_end].to_ascii_lowercase();
if scheme != "http" && scheme != "https" {
return Err(EndpointError::InvalidScheme(v.to_string()));
}
let normalized = v.trim_end_matches('/').to_string();
if normalized.len() < scheme_end + 3 {
return Err(EndpointError::NotAUrl(v.to_string()));
}
Ok(normalized)
}
pub fn pull_url(endpoint: &str, repo: &str, revision: &str, file: &str) -> String {
let base = endpoint.trim_end_matches('/');
let file = file.trim_start_matches('/');
format!("{base}/{repo}/resolve/{revision}/{file}")
}
pub fn url_targets_endpoint(url: &str, endpoint: &str) -> bool {
let base = endpoint.trim_end_matches('/');
url.starts_with(&format!("{base}/"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_endpoint_when_env_unset() {
let got = resolve_endpoint(std::iter::empty::<(&str, &str)>()).unwrap();
assert_eq!(got, "https://huggingface.co");
}
#[test]
fn hf_endpoint_override_wins() {
let got = resolve_endpoint([("HF_ENDPOINT", "https://mirror.local")]).unwrap();
assert_eq!(got, "https://mirror.local");
}
#[test]
fn trailing_slash_stripped() {
let got = resolve_endpoint([("HF_ENDPOINT", "https://mirror.local/")]).unwrap();
assert_eq!(got, "https://mirror.local");
}
#[test]
fn multiple_trailing_slashes_stripped() {
let got = resolve_endpoint([("HF_ENDPOINT", "https://mirror.local///")]).unwrap();
assert_eq!(got, "https://mirror.local");
}
#[test]
fn http_scheme_accepted() {
let got = resolve_endpoint([("HF_ENDPOINT", "http://127.0.0.1:18080")]).unwrap();
assert_eq!(got, "http://127.0.0.1:18080");
}
#[test]
fn ftp_scheme_rejected() {
let err = resolve_endpoint([("HF_ENDPOINT", "ftp://mirror.local")]).unwrap_err();
match err {
EndpointError::InvalidScheme(_) => {}
other => panic!("unexpected: {other:?}"),
}
}
#[test]
fn file_scheme_rejected() {
assert!(resolve_endpoint([("HF_ENDPOINT", "file:///etc/passwd")]).is_err());
}
#[test]
fn empty_hf_endpoint_rejected() {
assert!(resolve_endpoint([("HF_ENDPOINT", "")]).is_err());
assert!(resolve_endpoint([("HF_ENDPOINT", " ")]).is_err());
}
#[test]
fn garbage_rejected_as_not_url() {
assert!(resolve_endpoint([("HF_ENDPOINT", "not a url")]).is_err());
}
#[test]
fn scheme_only_rejected() {
let err = resolve_endpoint([("HF_ENDPOINT", "https://")]).unwrap_err();
match err {
EndpointError::NotAUrl(_) => {}
other => panic!("unexpected: {other:?}"),
}
}
#[test]
fn scheme_case_is_normalized_for_validation() {
assert!(resolve_endpoint([("HF_ENDPOINT", "HTTPS://mirror.local")]).is_ok());
assert!(resolve_endpoint([("HF_ENDPOINT", "HTTP://mirror.local")]).is_ok());
}
#[test]
fn unrelated_env_var_ignored() {
let got = resolve_endpoint([("SOME_OTHER_VAR", "ftp://bad")]).unwrap();
assert_eq!(got, "https://huggingface.co");
}
#[test]
fn resolve_is_deterministic() {
let a = resolve_endpoint([("HF_ENDPOINT", "https://mirror")]).unwrap();
let b = resolve_endpoint([("HF_ENDPOINT", "https://mirror")]).unwrap();
assert_eq!(a, b);
}
#[test]
fn pull_url_default_matches_hf_formula() {
let url = pull_url(
DEFAULT_HF_ENDPOINT,
"bert-base-uncased",
"main",
"config.json",
);
assert_eq!(
url,
"https://huggingface.co/bert-base-uncased/resolve/main/config.json"
);
}
#[test]
fn pull_url_mirror_matches_hf_formula() {
let url = pull_url(
"http://127.0.0.1:18080",
"bert-base-uncased",
"main",
"config.json",
);
assert_eq!(
url,
"http://127.0.0.1:18080/bert-base-uncased/resolve/main/config.json"
);
}
#[test]
fn pull_url_strips_trailing_slash_on_endpoint() {
let a = pull_url("http://127.0.0.1:18080/", "repo", "main", "file");
let b = pull_url("http://127.0.0.1:18080", "repo", "main", "file");
assert_eq!(a, b);
}
#[test]
fn pull_url_strips_leading_slash_on_file() {
let a = pull_url(DEFAULT_HF_ENDPOINT, "r", "main", "/file");
let b = pull_url(DEFAULT_HF_ENDPOINT, "r", "main", "file");
assert_eq!(a, b);
}
#[test]
fn pull_url_is_deterministic() {
let a = pull_url(DEFAULT_HF_ENDPOINT, "r", "main", "file");
let b = pull_url(DEFAULT_HF_ENDPOINT, "r", "main", "file");
assert_eq!(a, b);
}
#[test]
fn url_targets_endpoint_agreement() {
let mirror = "http://127.0.0.1:18080";
let url = pull_url(mirror, "r", "main", "f");
assert!(url_targets_endpoint(&url, mirror));
assert!(!url_targets_endpoint(&url, DEFAULT_HF_ENDPOINT));
}
#[test]
fn falsify_001_sub_claim_mirror_keeps_url_off_huggingface_co() {
let mirror = "http://127.0.0.1:18080";
let endpoint = resolve_endpoint([("HF_ENDPOINT", mirror)]).unwrap();
assert_eq!(endpoint, mirror);
let url = pull_url(&endpoint, "bert-base-uncased", "main", "config.json");
assert!(
!url.contains("huggingface.co"),
"URL leaked to huggingface.co despite HF_ENDPOINT override: {url}",
);
assert!(
url.contains("127.0.0.1:18080"),
"URL does not target mirror: {url}"
);
assert!(url_targets_endpoint(&url, mirror));
}
#[test]
fn falsify_001_sub_claim_unset_uses_canonical_host() {
let endpoint = resolve_endpoint(std::iter::empty::<(&str, &str)>()).unwrap();
assert_eq!(endpoint, "https://huggingface.co");
let url = pull_url(&endpoint, "r", "main", "f");
assert!(url.contains("huggingface.co"), "unexpected default: {url}");
}
#[test]
fn hf_endpoint_env_name_stable() {
assert_eq!(HF_ENDPOINT_ENV, "HF_ENDPOINT");
}
#[test]
fn default_endpoint_stable() {
assert_eq!(DEFAULT_HF_ENDPOINT, "https://huggingface.co");
}
}