use std::{
collections::BTreeMap,
fmt::Debug,
fs::File,
hash::{Hash, Hasher},
io::Read,
path::Path,
sync::OnceLock,
};
#[cfg(feature = "arbitrary")]
use arbitrary::Arbitrary;
pub use chrono;
use log::*;
#[cfg(feature = "neo4j")]
pub use neo4rs;
#[cfg(feature = "py")]
use pyo3::prelude::*;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use value::Map;
use crate::{
error::{Error, Result},
value::{Value, ValueSerializer},
};
pub mod prelude {
#[cfg(feature = "json")]
pub use crate::db::json::Json;
#[cfg(feature = "neo4j")]
pub use crate::db::neo4j::Neo4j;
#[cfg(feature = "sha")]
pub use crate::db::DatasetWriter;
pub use crate::{
db::{Database, DatabaseSession},
vergen_session, Dataset, Run, Session, Software,
};
}
pub mod db;
pub mod error;
pub mod value;
#[cfg(test)]
mod testutils;
#[macro_export]
#[cfg(feature = "git")]
macro_rules! vergen_version {
() => {{
concat!(env!("CARGO_PKG_VERSION"), "-", env!("VERGEN_GIT_SHA")).to_string()
}};
}
#[macro_export]
#[cfg(not(feature = "git"))]
macro_rules! vergen_version {
() => {{
env!("CARGO_PKG_VERSION").to_string()
}};
}
#[macro_export]
macro_rules! vergen_session {
() => {{
::simuldb::Session {
software: ::simuldb::Software {
name: env!("CARGO_CRATE_NAME").to_string(),
version: ::simuldb::vergen_version!(),
compile_time: env!("VERGEN_BUILD_TIMESTAMP").to_string(),
},
run: ::simuldb::Run {
date: ::simuldb::chrono::offset::Utc::now().to_rfc3339(),
},
}
}};
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
#[cfg_attr(feature = "py", pyclass(set_all, get_all))]
pub struct Session {
pub software: Software,
pub run: Run,
}
impl Session {
pub fn new(software: Software, run: Run) -> Self {
Self { software, run }
}
pub fn get_hash(&self) -> u64 {
let mut hasher = highway::HighwayHasher::default();
self.hash(&mut hasher);
hasher.finish()
}
}
#[cfg(feature = "py")]
#[pymethods]
impl Session {
#[new]
fn py_new(software: Software, run: Run) -> Self {
Self::new(software, run)
}
fn __repr__(&self) -> String {
format!(
"Session(software={}-{}, run={})",
self.software.name, self.software.version, self.run.date
)
}
#[pyo3(name = "hash")]
fn py_hash(&self) -> String {
let hash = self.get_hash();
format!("{:016X}", hash)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
#[cfg_attr(feature = "py", pyclass(set_all, get_all))]
pub struct Run {
pub date: String,
}
impl Run {
pub fn new<S>(date: S) -> Self
where
S: ToString,
{
Self {
date: date.to_string(),
}
}
}
#[cfg(feature = "py")]
#[pymethods]
impl Run {
#[new]
fn py_new(date: &str) -> Self {
Self::new(date)
}
fn __repr__(&self) -> String {
format!("Run(date={})", self.date)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
#[cfg_attr(feature = "py", pyclass(set_all, get_all))]
pub struct Software {
pub name: String,
pub version: String,
pub compile_time: String,
}
impl Software {
pub fn new<S, T, U>(name: S, version: T, compile_time: U) -> Self
where
S: ToString,
T: ToString,
U: ToString,
{
Self {
name: name.to_string(),
version: version.to_string(),
compile_time: compile_time.to_string(),
}
}
}
#[cfg(feature = "py")]
#[pymethods]
impl Software {
#[new]
fn py_new(name: &str, version: &str, compile_time: &str) -> Self {
Self::new(name, version, compile_time)
}
fn __repr__(&self) -> String {
format!(
"Software(name={}, version={}, compile_time={})",
self.name, self.version, self.compile_time
)
}
}
static HOST: OnceLock<Host> = OnceLock::new();
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
#[cfg_attr(feature = "py", pyclass(set_all, get_all))]
pub struct Host {
pub hostname: String,
}
impl Host {
pub fn new<S>(hostname: S) -> Self
where
S: ToString,
{
Self {
hostname: hostname.to_string(),
}
}
pub fn get_once<'a>() -> Result<&'a Self> {
HOST.get().map(Ok).unwrap_or_else(|| {
let hostname = ::hostname::get()?.to_string_lossy().into_owned();
Ok(HOST.get_or_init(|| Self { hostname }))
})
}
}
#[cfg(feature = "py")]
#[pymethods]
impl Host {
#[new]
fn py_new(hostname: &str) -> Self {
Self::new(hostname)
}
#[staticmethod]
fn get() -> Result<Self> {
Self::get_once().cloned()
}
fn __repr__(&self) -> String {
format!("Host(hostname={})", self.hostname)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
#[cfg_attr(feature = "py", pyclass)]
pub struct Dataset {
pub id: Uuid,
pub hash: Vec<u8>,
pub host: Option<Host>,
metadata: Map,
}
pub fn serialize_metadata<S: Serialize + Debug>(metadata: S) -> Result<Map> {
match metadata.serialize(ValueSerializer) {
Ok(Value::Map(map)) => Ok(map),
Ok(Value::Unit) => Ok(BTreeMap::new()),
Ok(_) => Err(Error::UnsupportedMetadata(format!(
"{metadata:?} is not a map",
))),
Err(e) => Err(Error::UnsupportedMetadata(format!("{e}"))),
}
}
impl Dataset {
pub fn from_hash<S: Serialize + Debug>(
hash: Vec<u8>,
metadata: S,
id: Option<Uuid>,
) -> Result<Self> {
let id = id.unwrap_or_else(Uuid::new_v4);
debug!("Serializing metadata: {metadata:?}");
let metadata = serialize_metadata(metadata)?;
let host = Some(Host::get_once()?.clone());
Ok(Dataset {
id,
hash,
metadata,
host,
})
}
#[cfg(feature = "sha")]
pub fn from_file<P: AsRef<Path>, S: Serialize + Debug>(
path: P,
metadata: S,
id: Option<Uuid>,
) -> Result<Self> {
use sha2::{Digest, Sha512};
let id = id.unwrap_or_else(Uuid::new_v4);
let hash = {
let mut hasher = Sha512::new();
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
hasher.update(buffer);
hasher.finalize().as_slice().to_vec()
};
debug!("Serializing metadata: {metadata:?}");
let metadata = serialize_metadata(metadata)?;
let host = Some(Host::get_once()?.clone());
Ok(Dataset {
id,
hash,
metadata,
host,
})
}
pub fn from_hash_with_host<S: Serialize + Debug>(
hash: Vec<u8>,
metadata: S,
id: Option<Uuid>,
host: Option<Host>,
) -> Result<Self> {
let id = id.unwrap_or_else(Uuid::new_v4);
debug!("Serializing metadata: {metadata:?}");
let metadata = serialize_metadata(metadata)?;
Ok(Dataset {
id,
hash,
metadata,
host,
})
}
#[cfg(feature = "sha")]
pub fn from_file_with_host<P: AsRef<Path>, S: Serialize + Debug>(
path: P,
metadata: S,
id: Option<Uuid>,
host: Option<Host>,
) -> Result<Self> {
use sha2::{Digest, Sha512};
let id = id.unwrap_or_else(Uuid::new_v4);
let hash = {
let mut hasher = Sha512::new();
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
hasher.update(buffer);
hasher.finalize().as_slice().to_vec()
};
debug!("Serializing metadata: {metadata:?}");
let metadata = serialize_metadata(metadata)?;
Ok(Dataset {
id,
hash,
metadata,
host,
})
}
}
#[cfg(feature = "py")]
#[pymethods]
impl Dataset {
#[new]
fn py_new(
id: Option<&str>,
metadata: Option<BTreeMap<String, Value>>,
hash: Option<&str>,
path: Option<&str>,
) -> Result<Dataset> {
let id = id.map(Uuid::parse_str).transpose()?;
match (hash, path) {
(Some(hash), None) => Self::from_hash(hex::decode(hash)?, metadata, id),
(None, Some(path)) => Self::from_file(path, metadata, id),
_ => Err(Error::Other(
"Exactly one of hash and path has to be specified".to_string(),
)),
}
}
fn __repr__(&self) -> String {
format!("Dataset(id={}, hash={})", self.id, hex::encode(&self.hash))
}
#[getter]
fn get_id(&self) -> String {
self.id.to_string()
}
#[getter]
fn get_hash(&self) -> String {
hex::encode(&self.hash)
}
#[getter]
fn get_metadata(&self) -> Result<Value> {
Ok(Value::Map(serialize_metadata(&self.metadata)?))
}
}
#[cfg(feature = "py")]
#[pymodule]
fn simuldb(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_class::<Session>()?;
m.add_class::<Software>()?;
m.add_class::<Run>()?;
m.add_class::<Dataset>()?;
#[cfg(feature = "json")]
{
m.add_class::<db::json::Json>()?;
m.add_class::<db::json::JsonSession>()?;
}
#[cfg(feature = "neo4j")]
{
m.add_class::<db::neo4j::Neo4j>()?;
m.add_class::<db::neo4j::Neo4jSession>()?;
}
Ok(())
}