use std::collections::{BTreeMap, HashMap, HashSet};
use std::io::Read;
use std::path::{Path, PathBuf};
use std::time::Duration;
use flate2::read::GzDecoder;
use serde::{Deserialize, Serialize};
use smol_str::SmolStr;
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct RemoteExports {
exports: HashMap<SmolStr, HashSet<SmolStr>>,
}
impl RemoteExports {
pub fn new() -> Self {
Self::default()
}
pub fn insert_package<I>(&mut self, package: impl Into<SmolStr>, names: I)
where
I: IntoIterator<Item = SmolStr>,
{
self.exports
.insert(package.into(), names.into_iter().collect());
}
pub fn merge_from(&mut self, other: RemoteExports) {
self.exports.extend(other.exports);
}
pub fn has_package(&self, package: &str) -> bool {
self.exports.contains_key(package)
}
pub fn exports(&self, package: &str, name: &str) -> bool {
self.exports
.get(package)
.is_some_and(|set| set.contains(name))
}
pub fn package_exports(&self, package: &str) -> Option<impl Iterator<Item = &SmolStr>> {
self.exports.get(package).map(|set| set.iter())
}
pub fn len(&self) -> usize {
self.exports.len()
}
pub fn is_empty(&self) -> bool {
self.exports.is_empty()
}
}
const MAX_BODY_BYTES: u64 = 64 * 1024 * 1024;
const SIDECAR_VERSION: &str = "v1";
const MANIFEST_TTL_SECS: u64 = 24 * 60 * 60;
const NOT_MODIFIED: u16 = 304;
#[derive(Debug)]
pub enum SidecarError {
Http(String),
Io(String),
}
impl std::fmt::Display for SidecarError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SidecarError::Http(s) => write!(f, "sidecar request failed: {s}"),
SidecarError::Io(s) => write!(f, "sidecar I/O error: {s}"),
}
}
}
impl std::error::Error for SidecarError {}
pub struct SidecarResponse {
pub status: u16,
pub body: Vec<u8>,
pub etag: Option<String>,
}
pub trait SidecarTransport: Send + Sync {
fn get(&self, url: &str, if_none_match: Option<&str>) -> Result<SidecarResponse, SidecarError>;
}
pub struct HttpTransport {
agent: ureq::Agent,
}
impl HttpTransport {
pub fn new() -> Self {
let agent = ureq::AgentBuilder::new()
.timeout(Duration::from_secs(15))
.build();
Self { agent }
}
}
impl Default for HttpTransport {
fn default() -> Self {
Self::new()
}
}
impl SidecarTransport for HttpTransport {
fn get(&self, url: &str, if_none_match: Option<&str>) -> Result<SidecarResponse, SidecarError> {
let mut req = self.agent.get(url);
if let Some(tag) = if_none_match {
req = req.set("If-None-Match", tag);
}
match req.call() {
Ok(resp) => {
let etag = resp.header("ETag").map(str::to_string);
let mut body = Vec::new();
resp.into_reader()
.take(MAX_BODY_BYTES)
.read_to_end(&mut body)
.map_err(|e| SidecarError::Io(e.to_string()))?;
Ok(SidecarResponse {
status: 200,
body,
etag,
})
}
Err(ureq::Error::Status(NOT_MODIFIED, resp)) => Ok(SidecarResponse {
status: NOT_MODIFIED,
body: Vec::new(),
etag: resp.header("ETag").map(str::to_string),
}),
Err(e) => Err(SidecarError::Http(e.to_string())),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct PackageEntry {
pub version: SmolStr,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub sha256: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Manifest {
#[serde(default)]
pub packages: BTreeMap<SmolStr, PackageEntry>,
}
impl Manifest {
pub fn version_of(&self, package: &str) -> Option<&SmolStr> {
self.packages.get(package).map(|e| &e.version)
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
struct ManifestMeta {
#[serde(default)]
etag: Option<String>,
#[serde(default)]
fetched_at: u64,
}
pub struct Sidecar {
base_url: String,
dir: PathBuf,
transport: Box<dyn SidecarTransport>,
manifest: Option<Manifest>,
now: u64,
}
impl Sidecar {
pub fn new(
base_url: impl Into<String>,
cache_root: &Path,
transport: Box<dyn SidecarTransport>,
now: u64,
) -> Self {
let base_url = base_url.into().trim_end_matches('/').to_string();
Self {
base_url,
dir: cache_root.join("sidecar").join(SIDECAR_VERSION),
transport,
manifest: None,
now,
}
}
pub fn http(base_url: impl Into<String>, cache_root: &Path) -> Self {
Self::new(
base_url,
cache_root,
Box::new(HttpTransport::new()),
now_unix(),
)
}
fn manifest_url(&self) -> String {
format!("{}/{SIDECAR_VERSION}/manifest.json", self.base_url)
}
fn package_url(&self, package: &str, version: &str) -> String {
format!(
"{}/{SIDECAR_VERSION}/{package}/{version}.txt",
self.base_url
)
}
fn manifest_path(&self) -> PathBuf {
self.dir.join("manifest.json")
}
fn meta_path(&self) -> PathBuf {
self.dir.join("manifest.meta.json")
}
fn package_path(&self, package: &str, version: &str) -> PathBuf {
self.dir.join(format!("{package}@{version}.txt"))
}
pub fn load_cached(&self) -> RemoteExports {
let mut out = RemoteExports::new();
let Some(manifest) = read_disk_manifest(&self.manifest_path()) else {
return out;
};
for (pkg, entry) in &manifest.packages {
if let Some(names) = read_exports_file(&self.package_path(pkg, &entry.version)) {
out.insert_package(pkg.clone(), names);
}
}
out
}
pub fn manifest(&mut self) -> Option<&Manifest> {
if self.manifest.is_some() {
return self.manifest.as_ref();
}
let now = self.now;
let disk = read_disk_manifest(&self.manifest_path());
let meta = read_manifest_meta(&self.meta_path());
let fresh = disk.is_some()
&& meta
.as_ref()
.is_some_and(|m| now.saturating_sub(m.fetched_at) < MANIFEST_TTL_SECS);
if fresh {
self.manifest = disk;
return self.manifest.as_ref();
}
let inm = disk
.as_ref()
.and_then(|_| meta.as_ref().and_then(|m| m.etag.clone()));
match self.transport.get(&self.manifest_url(), inm.as_deref()) {
Ok(resp) if resp.status == NOT_MODIFIED => {
let etag = resp.etag.or(inm);
let _ = write_manifest_meta(
&self.meta_path(),
&ManifestMeta {
etag,
fetched_at: now,
},
);
self.manifest = disk;
}
Ok(resp) => {
let decoded = maybe_gunzip(resp.body);
match serde_json::from_slice::<Manifest>(&decoded) {
Ok(m) => {
let _ = write_atomic(&self.manifest_path(), &decoded);
let _ = write_manifest_meta(
&self.meta_path(),
&ManifestMeta {
etag: resp.etag,
fetched_at: now,
},
);
self.manifest = Some(m);
}
Err(_) => self.manifest = disk,
}
}
Err(_) => self.manifest = disk,
}
self.manifest.as_ref()
}
pub fn package_names(&mut self, package: &str) -> Option<Vec<SmolStr>> {
let version = self.manifest()?.version_of(package)?.clone();
if let Some(names) = read_exports_file(&self.package_path(package, &version)) {
return Some(names);
}
let resp = self
.transport
.get(&self.package_url(package, &version), None)
.ok()?;
let text = String::from_utf8(maybe_gunzip(resp.body)).ok()?;
let names = parse_exports(&text);
let _ = write_atomic(&self.package_path(package, &version), text.as_bytes());
Some(names)
}
}
fn now_unix() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
fn read_manifest_meta(path: &Path) -> Option<ManifestMeta> {
let bytes = std::fs::read(path).ok()?;
serde_json::from_slice(&bytes).ok()
}
fn write_manifest_meta(path: &Path, meta: &ManifestMeta) -> Result<(), SidecarError> {
let json = serde_json::to_vec(meta).map_err(|e| SidecarError::Io(e.to_string()))?;
write_atomic(path, &json)
}
fn read_disk_manifest(path: &Path) -> Option<Manifest> {
let bytes = std::fs::read(path).ok()?;
serde_json::from_slice(&bytes).ok()
}
fn read_exports_file(path: &Path) -> Option<Vec<SmolStr>> {
let text = std::fs::read_to_string(path).ok()?;
Some(parse_exports(&text))
}
fn parse_exports(text: &str) -> Vec<SmolStr> {
text.lines()
.map(str::trim)
.filter(|l| !l.is_empty() && !l.starts_with('#'))
.map(SmolStr::new)
.collect()
}
fn maybe_gunzip(bytes: Vec<u8>) -> Vec<u8> {
if bytes.len() >= 2 && bytes[0] == 0x1f && bytes[1] == 0x8b {
let mut out = Vec::new();
if GzDecoder::new(&bytes[..]).read_to_end(&mut out).is_ok() {
return out;
}
}
bytes
}
fn write_atomic(path: &Path, bytes: &[u8]) -> Result<(), SidecarError> {
let dir = path
.parent()
.ok_or_else(|| SidecarError::Io("no parent directory".into()))?;
std::fs::create_dir_all(dir).map_err(|e| SidecarError::Io(e.to_string()))?;
let mut tmp =
tempfile::NamedTempFile::new_in(dir).map_err(|e| SidecarError::Io(e.to_string()))?;
use std::io::Write as _;
tmp.write_all(bytes)
.map_err(|e| SidecarError::Io(e.to_string()))?;
tmp.persist(path)
.map_err(|e| SidecarError::Io(e.to_string()))?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn names(items: &[&str]) -> Vec<SmolStr> {
items.iter().copied().map(SmolStr::new).collect()
}
#[test]
fn insert_then_query() {
let mut remote = RemoteExports::new();
remote.insert_package("cli", names(&["cli_alert", "cli_abort"]));
assert!(remote.has_package("cli"));
assert!(remote.exports("cli", "cli_alert"));
assert!(!remote.exports("cli", "not_an_export"));
assert!(!remote.has_package("absent"));
assert!(!remote.exports("absent", "anything"));
}
#[test]
fn empty_package_is_known_but_exports_nothing() {
let mut remote = RemoteExports::new();
remote.insert_package("empty", names(&[]));
assert!(remote.has_package("empty"));
assert!(!remote.exports("empty", "anything"));
}
#[test]
fn insert_replaces_previous_entry() {
let mut remote = RemoteExports::new();
remote.insert_package("pkg", names(&["old"]));
remote.insert_package("pkg", names(&["new"]));
assert!(!remote.exports("pkg", "old"));
assert!(remote.exports("pkg", "new"));
}
#[test]
fn package_exports_iterates_names() {
let mut remote = RemoteExports::new();
remote.insert_package("pkg", names(&["a", "b"]));
let mut got: Vec<String> = remote
.package_exports("pkg")
.expect("known")
.map(|s| s.to_string())
.collect();
got.sort();
assert_eq!(got, vec!["a".to_string(), "b".to_string()]);
assert!(remote.package_exports("absent").is_none());
}
struct StubTransport {
responses: HashMap<String, Vec<u8>>,
}
impl StubTransport {
fn new(responses: &[(&str, &str)]) -> Self {
Self {
responses: responses
.iter()
.map(|(u, b)| (u.to_string(), b.as_bytes().to_vec()))
.collect(),
}
}
}
impl SidecarTransport for StubTransport {
fn get(&self, url: &str, _inm: Option<&str>) -> Result<SidecarResponse, SidecarError> {
self.responses
.get(url)
.cloned()
.map(|body| SidecarResponse {
status: 200,
body,
etag: None,
})
.ok_or_else(|| SidecarError::Http(format!("404 {url}")))
}
}
const BASE: &str = "https://sidecar.example/cran";
const NOW: u64 = 1_000_000;
fn manifest_json() -> &'static str {
r#"{"packages":{"tinytable":{"version":"0.6.1"},"cli":{"version":"3.6.1"}}}"#
}
#[test]
fn fetches_and_caches_package_names() {
let tmp = tempfile::tempdir().unwrap();
let stub = StubTransport::new(&[
(&format!("{BASE}/v1/manifest.json"), manifest_json()),
(
&format!("{BASE}/v1/tinytable/0.6.1.txt"),
"tt\ntheme_tt\n# a comment\n\nformat_tt\n",
),
]);
let mut sidecar = Sidecar::new(BASE, tmp.path(), Box::new(stub), NOW);
let got = sidecar.package_names("tinytable").expect("fetched");
let got: Vec<String> = got.iter().map(|s| s.to_string()).collect();
assert!(got.contains(&"tt".to_string()), "{got:?}");
assert!(got.contains(&"format_tt".to_string()), "{got:?}");
assert!(!got.iter().any(|n| n.starts_with('#')), "comments stripped");
assert!(tmp.path().join("sidecar/v1/tinytable@0.6.1.txt").exists());
}
#[test]
fn second_lookup_is_a_disk_cache_hit() {
let tmp = tempfile::tempdir().unwrap();
let stub = StubTransport::new(&[
(&format!("{BASE}/v1/manifest.json"), manifest_json()),
(
&format!("{BASE}/v1/cli/3.6.1.txt"),
"cli_alert\ncli_abort\n",
),
]);
let mut sidecar = Sidecar::new(BASE, tmp.path(), Box::new(stub), NOW);
assert!(sidecar.package_names("cli").is_some());
struct NoNet;
impl SidecarTransport for NoNet {
fn get(&self, url: &str, _inm: Option<&str>) -> Result<SidecarResponse, SidecarError> {
panic!("unexpected network access: {url}");
}
}
let mut offline = Sidecar::new(BASE, tmp.path(), Box::new(NoNet), NOW + 10);
let got = offline.package_names("cli").expect("from disk");
assert!(got.iter().any(|n| n == "cli_alert"), "{got:?}");
let warmed = offline.load_cached();
assert!(warmed.exports("cli", "cli_abort"));
}
#[test]
fn unknown_package_yields_none_without_package_fetch() {
let tmp = tempfile::tempdir().unwrap();
let stub = StubTransport::new(&[(&format!("{BASE}/v1/manifest.json"), manifest_json())]);
let mut sidecar = Sidecar::new(BASE, tmp.path(), Box::new(stub), NOW);
assert!(sidecar.package_names("not_on_cran_xyz").is_none());
}
#[test]
fn gunzips_gzipped_bodies() {
use flate2::Compression;
use flate2::write::GzEncoder;
use std::io::Write as _;
let mut enc = GzEncoder::new(Vec::new(), Compression::default());
enc.write_all(b"gt\nplotgg\n").unwrap();
let gz = enc.finish().unwrap();
struct GzStub {
manifest: Vec<u8>,
pkg: Vec<u8>,
}
impl SidecarTransport for GzStub {
fn get(&self, url: &str, _inm: Option<&str>) -> Result<SidecarResponse, SidecarError> {
let body = if url.ends_with("manifest.json") {
self.manifest.clone()
} else if url.ends_with("ggplot2/3.5.1.txt") {
self.pkg.clone()
} else {
return Err(SidecarError::Http("404".into()));
};
Ok(SidecarResponse {
status: 200,
body,
etag: None,
})
}
}
let tmp = tempfile::tempdir().unwrap();
let transport = GzStub {
manifest: br#"{"packages":{"ggplot2":{"version":"3.5.1"}}}"#.to_vec(),
pkg: gz,
};
let mut sidecar = Sidecar::new(BASE, tmp.path(), Box::new(transport), NOW);
let got = sidecar.package_names("ggplot2").expect("decoded");
assert!(got.iter().any(|n| n == "plotgg"), "{got:?}");
}
struct EtagStub {
etag: String,
body: Vec<u8>,
seen_inm: std::sync::Arc<std::sync::Mutex<Vec<Option<String>>>>,
}
impl SidecarTransport for EtagStub {
fn get(&self, url: &str, inm: Option<&str>) -> Result<SidecarResponse, SidecarError> {
assert!(
url.ends_with("manifest.json"),
"only the manifest is fetched"
);
self.seen_inm.lock().unwrap().push(inm.map(str::to_string));
if inm == Some(self.etag.as_str()) {
return Ok(SidecarResponse {
status: NOT_MODIFIED,
body: Vec::new(),
etag: Some(self.etag.clone()),
});
}
Ok(SidecarResponse {
status: 200,
body: self.body.clone(),
etag: Some(self.etag.clone()),
})
}
}
#[test]
fn stale_manifest_revalidates_with_etag_and_304() {
let tmp = tempfile::tempdir().unwrap();
let body = br#"{"packages":{"tinytable":{"version":"0.6.1"}}}"#.to_vec();
let seen0 = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
let mut s1 = Sidecar::new(
BASE,
tmp.path(),
Box::new(EtagStub {
etag: "etag-v1".into(),
body: body.clone(),
seen_inm: seen0.clone(),
}),
NOW,
);
assert_eq!(
s1.manifest()
.and_then(|m| m.version_of("tinytable"))
.map(|v| v.as_str()),
Some("0.6.1")
);
assert_eq!(
seen0.lock().unwrap().as_slice(),
&[None],
"first load is unconditional"
);
let seen1 = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
let mut s2 = Sidecar::new(
BASE,
tmp.path(),
Box::new(EtagStub {
etag: "etag-v1".into(),
body,
seen_inm: seen1.clone(),
}),
NOW + MANIFEST_TTL_SECS + 1,
);
assert_eq!(
s2.manifest()
.and_then(|m| m.version_of("tinytable"))
.map(|v| v.as_str()),
Some("0.6.1")
);
assert_eq!(
seen1.lock().unwrap().as_slice(),
&[Some("etag-v1".to_string())],
"revalidation sent the stored ETag as If-None-Match"
);
}
#[test]
fn stale_manifest_with_changed_etag_picks_up_new_version() {
let tmp = tempfile::tempdir().unwrap();
let seen = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
let mut s1 = Sidecar::new(
BASE,
tmp.path(),
Box::new(EtagStub {
etag: "etag-v1".into(),
body: br#"{"packages":{"tinytable":{"version":"0.6.1"}}}"#.to_vec(),
seen_inm: seen.clone(),
}),
NOW,
);
assert!(s1.manifest().is_some());
let mut s2 = Sidecar::new(
BASE,
tmp.path(),
Box::new(EtagStub {
etag: "etag-v2".into(),
body: br#"{"packages":{"tinytable":{"version":"0.7.0"}}}"#.to_vec(),
seen_inm: seen.clone(),
}),
NOW + MANIFEST_TTL_SECS + 1,
);
assert_eq!(
s2.manifest()
.and_then(|m| m.version_of("tinytable"))
.map(|v| v.as_str()),
Some("0.7.0"),
"a changed manifest is adopted on revalidation"
);
}
#[test]
fn manifest_accepts_optional_sha256_slot() {
let json = r#"{"packages":{"cli":{"version":"3.6.1","sha256":"deadbeef"}}}"#;
let manifest: Manifest = serde_json::from_slice(json.as_bytes()).expect("parse");
let entry = manifest.packages.get("cli").expect("cli present");
assert_eq!(entry.version.as_str(), "3.6.1");
assert_eq!(entry.sha256.as_deref(), Some("deadbeef"));
}
}