use sha2::{Digest, Sha256};
use std::fs;
use std::io::{Read, Write};
use std::time::Duration;
pub const MAX_REMOTE_CONFIG_SIZE: u64 = 1024 * 1024;
const DEFAULT_ALLOWED_HOSTS: &[&str] = &[
"github.com",
"raw.githubusercontent.com",
"gist.github.com",
"gist.githubusercontent.com",
"gitlab.com",
"bitbucket.org",
];
const SENSITIVE_HEADER_NAMES: &[&str] = &[
"authorization",
"proxy-authorization",
"cookie",
"x-api-key",
];
#[derive(Debug)]
struct UrlPolicy<'a> {
scheme: &'a str,
host: String,
}
fn parse_url_policy(url: &str) -> Result<UrlPolicy<'_>, String> {
let Some(scheme_end) = url.find("://") else {
return Err(format!("URL is missing a scheme: {url}"));
};
let scheme = &url[..scheme_end];
let after = &url[scheme_end + 3..];
let host_end = after.find(['/', '?', '#']).unwrap_or(after.len());
let authority = &after[..host_end];
let host_with_port = match authority.find('@') {
Some(at) => &authority[at + 1..],
None => authority,
};
let host = match host_with_port.find(':') {
Some(p) => &host_with_port[..p],
None => host_with_port,
};
if host.is_empty() {
return Err(format!("URL has empty host: {url}"));
}
Ok(UrlPolicy {
scheme,
host: host.to_ascii_lowercase(),
})
}
fn host_in_allowlist(host: &str, allow_extra: &[String]) -> bool {
if DEFAULT_ALLOWED_HOSTS.contains(&host) {
return true;
}
if allow_extra.iter().any(|h| h.eq_ignore_ascii_case(host)) {
return true;
}
DEFAULT_ALLOWED_HOSTS
.iter()
.any(|allowed| host.ends_with(&format!(".{allowed}")))
}
fn extra_allowed_hosts_from_env() -> Vec<String> {
std::env::var("JARVY_ALLOW_REMOTE_HOST")
.ok()
.map(|raw| {
raw.split(',')
.map(|s| s.trim().to_ascii_lowercase())
.filter(|s| !s.is_empty())
.collect()
})
.unwrap_or_default()
}
fn header_is_sensitive(name: &str) -> bool {
SENSITIVE_HEADER_NAMES
.iter()
.any(|h| name.eq_ignore_ascii_case(h))
}
pub fn fetch_remote_config(url: &str, headers: &[String]) -> Result<String, String> {
let policy = parse_url_policy(url)?;
let allow_extra = extra_allowed_hosts_from_env();
let scheme_ok = policy.scheme == "https"
|| (policy.scheme == "http" && (policy.host == "localhost" || policy.host == "127.0.0.1"));
if !scheme_ok {
return Err(format!(
"Refusing to fetch remote config over scheme '{}'. Use https://. \
(URL: {url})",
policy.scheme
));
}
let host_allowed = host_in_allowlist(&policy.host, &allow_extra);
if !host_allowed {
return Err(format!(
"Refusing to fetch remote config from host '{}'. \
Allowed hosts: {}. \
To permit a custom host, set JARVY_ALLOW_REMOTE_HOST=\"host1,host2\".",
policy.host,
DEFAULT_ALLOWED_HOSTS.join(", ")
));
}
let cache_dir = dirs::home_dir()
.ok_or("Could not determine home directory")?
.join(".jarvy")
.join("cache")
.join("configs");
if !cache_dir.exists() {
fs::create_dir_all(&cache_dir)
.map_err(|e| format!("Failed to create cache directory: {}", e))?;
}
let cache_key = hex::encode(Sha256::digest(url.as_bytes()));
let cache_file = cache_dir.join(format!("{}.toml", &cache_key[..16]));
let cache_meta = cache_dir.join(format!("{}.meta", &cache_key[..16]));
let cache_valid = if cache_file.exists() && cache_meta.exists() {
if let Ok(metadata) = fs::metadata(&cache_meta) {
if let Ok(modified) = metadata.modified() {
modified
.elapsed()
.map(|d| d < Duration::from_secs(3600))
.unwrap_or(false)
} else {
false
}
} else {
false
}
} else {
false
};
if cache_valid {
println!("Using cached config from {}", url);
tracing::debug!(
event = "remote_config.cache.hit",
url_hash = &cache_key[..16],
);
return Ok(cache_file.to_string_lossy().to_string());
}
println!("Fetching config from {}...", url);
tracing::info!(
event = "remote_config.fetch.start",
url_hash = &cache_key[..16],
host = %policy.host,
);
let fetch_url = transform_github_url(url);
let agent = ureq::Agent::new_with_defaults();
let mut request = agent
.get(&fetch_url)
.header(
"User-Agent",
"Jarvy/0.1 (https://github.com/bearbinary/jarvy)",
)
.header("Accept", "text/plain, application/toml, */*");
let host_in_default_list = DEFAULT_ALLOWED_HOSTS
.iter()
.any(|h| policy.host == *h || policy.host.ends_with(&format!(".{h}")));
for header in headers {
if let Some((key, value)) = header.split_once(':') {
let key = key.trim();
let value = value.trim();
if key.is_empty() || value.is_empty() {
eprintln!(
"Warning: Invalid header format '{}', expected 'Name: Value'",
header
);
continue;
}
if header_is_sensitive(key) && !host_in_default_list {
tracing::warn!(
event = "remote_config.header_dropped",
header_name = %key,
host = %policy.host,
reason = "non_default_host"
);
continue;
}
request = request.header(key, value);
} else {
eprintln!(
"Warning: Invalid header format '{}', expected 'Name: Value'",
header
);
}
}
let response = request
.call()
.map_err(|e| format!("Failed to fetch config: {}", e))?;
if response.status() != 200 {
return Err(format!("HTTP error {}", response.status()));
}
if let Some(content_length) = response.headers().get("content-length") {
if let Some(length) = content_length
.to_str()
.ok()
.and_then(|s| s.parse::<u64>().ok())
{
if length > MAX_REMOTE_CONFIG_SIZE {
return Err(format!(
"Remote config too large: {} bytes (max {} bytes)",
length, MAX_REMOTE_CONFIG_SIZE
));
}
}
}
let mut content = String::new();
let mut body = response.into_body();
let reader = body.as_reader();
let mut limited_reader = reader.take(MAX_REMOTE_CONFIG_SIZE + 1);
limited_reader
.read_to_string(&mut content)
.map_err(|e| format!("Failed to read response body: {}", e))?;
if content.len() as u64 > MAX_REMOTE_CONFIG_SIZE {
tracing::warn!(
event = "remote_config.size_limit_exceeded",
url_hash = &cache_key[..16],
max = MAX_REMOTE_CONFIG_SIZE,
);
return Err(format!(
"Remote config too large: exceeds {} bytes limit",
MAX_REMOTE_CONFIG_SIZE
));
}
tracing::info!(
event = "remote_config.fetch.complete",
url_hash = &cache_key[..16],
bytes = content.len(),
);
let _: toml::Value =
toml::from_str(&content).map_err(|e| format!("Invalid TOML in remote config: {}", e))?;
let mut file =
fs::File::create(&cache_file).map_err(|e| format!("Failed to create cache file: {}", e))?;
file.write_all(content.as_bytes())
.map_err(|e| format!("Failed to write cache file: {}", e))?;
let mut meta_file = fs::File::create(&cache_meta)
.map_err(|e| format!("Failed to create cache metadata: {}", e))?;
meta_file
.write_all(url.as_bytes())
.map_err(|e| format!("Failed to write cache metadata: {}", e))?;
println!("Config cached at {}", cache_file.display());
Ok(cache_file.to_string_lossy().to_string())
}
pub fn transform_github_url(url: &str) -> String {
if url.contains("github.com") && url.contains("/blob/") {
return url
.replace("github.com", "raw.githubusercontent.com")
.replace("/blob/", "/");
}
if url.contains("gist.github.com") && !url.contains("/raw") {
return format!("{}/raw", url.trim_end_matches('/'));
}
url.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_url_policy_extracts_host_lowercase() {
let p = parse_url_policy("https://Raw.GithubUserContent.Com/x/y").unwrap();
assert_eq!(p.scheme, "https");
assert_eq!(p.host, "raw.githubusercontent.com");
}
#[test]
fn parse_url_policy_strips_userinfo_and_port() {
let p = parse_url_policy("https://user:tok@github.com:8443/x").unwrap();
assert_eq!(p.host, "github.com");
}
#[test]
fn parse_url_policy_rejects_missing_scheme() {
assert!(parse_url_policy("github.com/owner/repo").is_err());
}
#[test]
fn allowlist_accepts_default_hosts_and_subdomains() {
assert!(host_in_allowlist("github.com", &[]));
assert!(host_in_allowlist("raw.githubusercontent.com", &[]));
assert!(host_in_allowlist("api.github.com", &[])); assert!(!host_in_allowlist("attacker.tld", &[]));
}
#[test]
fn allowlist_honors_extra_hosts() {
let extra = vec!["internal.corp".to_string()];
assert!(host_in_allowlist("internal.corp", &extra));
assert!(!host_in_allowlist("attacker.tld", &extra));
}
#[test]
fn fetch_rejects_http_to_remote_host() {
let err = fetch_remote_config("http://example.com/x.toml", &[])
.expect_err("http to remote host must be refused");
assert!(err.contains("scheme") || err.contains("host"));
}
#[test]
fn fetch_rejects_disallowed_host() {
let err = fetch_remote_config("https://attacker.tld/jarvy.toml", &[])
.expect_err("disallowed host must be refused");
assert!(err.contains("attacker.tld"));
}
#[test]
fn header_sensitivity_check_is_case_insensitive() {
assert!(header_is_sensitive("Authorization"));
assert!(header_is_sensitive("authorization"));
assert!(header_is_sensitive("X-API-Key"));
assert!(!header_is_sensitive("User-Agent"));
assert!(!header_is_sensitive("Accept"));
}
}