use std::path::{Path, PathBuf};
use crate::{Error, Result};
const DATA_VERSION: &str = env!("CARGO_PKG_VERSION");
#[cfg(feature = "fetch")]
const RELEASE_URL: &str = "https://github.com/exoma-ch/nucl-parquet/releases/download";
#[derive(Debug, Clone)]
pub struct DataDir {
root: PathBuf,
}
impl DataDir {
pub fn resolve() -> Result<Self> {
if let Ok(env) = std::env::var("NUCL_PARQUET_DATA") {
let p = PathBuf::from(env);
if p.is_dir() {
return Ok(Self { root: p });
}
}
let cache = Self::cache_dir();
if cache.join("meta").is_dir() {
return Ok(Self { root: cache });
}
Err(Error::DataNotFound)
}
#[cfg(feature = "fetch")]
pub fn ensure() -> Result<Self> {
if let Ok(d) = Self::resolve() {
return Ok(d);
}
Self::download()?;
Self::resolve()
}
#[cfg(feature = "fetch")]
pub fn ensure_lazy() -> Result<Self> {
if let Ok(d) = Self::resolve() {
return Ok(d);
}
let cache = Self::cache_dir();
std::fs::create_dir_all(&cache)?;
let catalog_url = format!(
"https://raw.githubusercontent.com/exoma-ch/nucl-parquet/main/data/catalog.json"
);
let catalog_path = cache.join("catalog.json");
if !catalog_path.exists() {
eprintln!("Fetching catalog from {catalog_url} ...");
Self::fetch_url(&catalog_url, &catalog_path)?;
}
let catalog_text =
std::fs::read_to_string(&catalog_path).map_err(|e| Error::Download(e.to_string()))?;
let catalog: serde_json::Value =
serde_json::from_str(&catalog_text).map_err(|e| Error::Download(e.to_string()))?;
let data_version = catalog["data_version"].as_str().unwrap_or("latest");
let base_template = catalog["base_url"].as_str().unwrap_or(
"https://raw.githubusercontent.com/exoma-ch/nucl-parquet/data-{version}/data",
);
let base_url = base_template.replace("{version}", data_version);
let marker = cache.join(".lazy_base_url");
std::fs::write(&marker, &base_url)?;
std::fs::create_dir_all(cache.join("meta"))?;
eprintln!(
"Lazy mode: catalog at {}, files on demand from {base_url}",
cache.display()
);
Ok(Self { root: cache })
}
#[cfg(feature = "fetch")]
pub fn fetch_file(&self, rel_path: &str) -> Result<std::path::PathBuf> {
let dest = self.root.join(rel_path);
if dest.exists() {
return Ok(dest);
}
let marker = self.root.join(".lazy_base_url");
if !marker.exists() {
return Err(Error::DataDirNotFound(dest));
}
let base_url =
std::fs::read_to_string(&marker).map_err(|e| Error::Download(e.to_string()))?;
let url = format!("{}/{}", base_url.trim(), rel_path);
eprintln!(" Fetching {rel_path} ...");
Self::fetch_url(&url, &dest)?;
Ok(dest)
}
#[cfg(feature = "fetch")]
fn fetch_url(url: &str, dest: &Path) -> Result<()> {
if let Some(parent) = dest.parent() {
std::fs::create_dir_all(parent)?;
}
let resp = reqwest::blocking::get(url).map_err(|e| Error::Download(e.to_string()))?;
if !resp.status().is_success() {
return Err(Error::Download(format!("HTTP {} for {url}", resp.status())));
}
let bytes = resp.bytes().map_err(|e| Error::Download(e.to_string()))?;
let tmp = dest.with_extension("tmp");
std::fs::write(&tmp, &bytes)?;
std::fs::rename(&tmp, dest)?;
Ok(())
}
pub fn from_root(root: impl AsRef<Path>) -> Self {
Self {
root: root.as_ref().to_path_buf(),
}
}
pub fn root(&self) -> &Path {
&self.root
}
pub fn meta(&self) -> PathBuf {
self.root.join("meta")
}
pub fn stopping(&self) -> PathBuf {
self.root.join("stopping")
}
pub fn photon_db(&self) -> Result<crate::PhotonDb> {
crate::PhotonDb::open(self.meta())
}
pub fn relaxation_db(&self) -> Result<crate::RelaxationDb> {
crate::RelaxationDb::open(self.meta())
}
pub fn subshell_pe_db(&self) -> Result<crate::SubshellPeDb> {
crate::SubshellPeDb::open(self.meta())
}
pub fn xcom_db(&self) -> Result<crate::XcomDb> {
crate::XcomDb::open(self.meta())
}
pub fn electron_db(&self) -> Result<crate::ElectronDb> {
crate::ElectronDb::open(self.meta())
}
pub fn stopping_db(&self) -> Result<crate::StoppingDb> {
crate::StoppingDb::open(self.stopping())
}
pub fn abundances_db(&self) -> Result<crate::AbundancesDb> {
crate::AbundancesDb::open(self.meta())
}
pub fn decay_db(&self) -> Result<crate::DecayDb> {
crate::DecayDb::open(self.meta())
}
pub fn dose_db(&self) -> Result<crate::DoseDb> {
crate::DoseDb::open(self.meta())
}
fn cache_dir() -> PathBuf {
home_dir()
.join(".nucl-parquet")
.join(format!("v{DATA_VERSION}"))
}
#[cfg(feature = "fetch")]
fn download() -> Result<()> {
let url =
format!("{RELEASE_URL}/v{DATA_VERSION}/nucl-parquet-data-v{DATA_VERSION}.tar.zst");
let cache = Self::cache_dir();
std::fs::create_dir_all(&cache)?;
eprintln!("Downloading nucl-parquet data from {url} ...");
let resp = reqwest::blocking::get(&url).map_err(|e| Error::Download(e.to_string()))?;
if !resp.status().is_success() {
return Err(Error::Download(format!("HTTP {}", resp.status())));
}
let decoder =
zstd::stream::Decoder::new(resp).map_err(|e| Error::Download(format!("zstd: {e}")))?;
let mut archive = tar::Archive::new(decoder);
for entry in archive
.entries()
.map_err(|e| Error::Download(format!("tar: {e}")))?
{
let mut entry = entry.map_err(|e| Error::Download(format!("tar: {e}")))?;
let path = entry
.path()
.map_err(|e| Error::Download(format!("tar: {e}")))?;
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if name.starts_with("._") {
continue;
}
entry
.unpack_in(&cache)
.map_err(|e| Error::Download(format!("tar: {e}")))?;
}
eprintln!("Data extracted to {}", cache.display());
Ok(())
}
}
fn home_dir() -> PathBuf {
std::env::var("HOME")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from("/tmp"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cache_dir_contains_version() {
let cd = DataDir::cache_dir();
let version = env!("CARGO_PKG_VERSION");
assert!(cd.ends_with(format!("v{version}")));
}
#[test]
fn meta_and_stopping_paths() {
let dd = DataDir {
root: PathBuf::from("/tmp/fake"),
};
assert_eq!(dd.meta(), PathBuf::from("/tmp/fake/meta"));
assert_eq!(dd.stopping(), PathBuf::from("/tmp/fake/stopping"));
}
}