mod datasheet;
mod version;
pub use datasheet::Datasheet;
pub use version::DatasetVersion;
use crate::storage::ContentAddress;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct DatasetId(Uuid);
impl DatasetId {
#[must_use]
pub fn new() -> Self {
Self(Uuid::new_v4())
}
#[must_use]
pub fn from_uuid(uuid: Uuid) -> Self {
Self(uuid)
}
#[must_use]
pub fn as_uuid(&self) -> &Uuid {
&self.0
}
}
impl Default for DatasetId {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for DatasetId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::str::FromStr for DatasetId {
type Err = uuid::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self(Uuid::parse_str(s)?))
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DatasetReference {
pub name: String,
pub version: DatasetVersion,
}
impl DatasetReference {
#[must_use]
pub fn new(name: impl Into<String>, version: DatasetVersion) -> Self {
Self {
name: name.into(),
version,
}
}
}
impl std::fmt::Display for DatasetReference {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.name, self.version)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Dataset {
pub id: DatasetId,
pub name: String,
pub version: DatasetVersion,
pub content_address: ContentAddress,
pub datasheet: Datasheet,
pub created_at: DateTime<Utc>,
}
impl Dataset {
#[must_use]
pub fn reference(&self) -> DatasetReference {
DatasetReference::new(&self.name, self.version.clone())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ProvenanceRecord {
WasDerivedFrom {
derived: DatasetId,
source: DatasetId,
transformation: String,
},
WasGeneratedBy {
data: DatasetId,
activity: String,
timestamp: DateTime<Utc>,
},
Used {
activity: String,
data: DatasetId,
},
WasAttributedTo {
entity: DatasetId,
agent: String,
},
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dataset_id_generation() {
let id1 = DatasetId::new();
let id2 = DatasetId::new();
assert_ne!(id1, id2);
}
#[test]
fn test_dataset_id_from_str() {
let id = DatasetId::new();
let s = id.to_string();
let parsed: DatasetId = s.parse().unwrap();
assert_eq!(id, parsed);
}
#[test]
fn test_dataset_reference_display() {
let reference = DatasetReference::new("transactions", DatasetVersion::new(1, 2, 3));
assert_eq!(reference.to_string(), "transactions:1.2.3");
}
#[test]
fn test_provenance_serialization() {
let record = ProvenanceRecord::WasDerivedFrom {
derived: DatasetId::new(),
source: DatasetId::new(),
transformation: "normalize".to_string(),
};
let json = serde_json::to_string(&record).unwrap();
assert!(json.contains("was_derived_from"));
}
}