1mod datasheet;
6mod version;
7
8pub use datasheet::Datasheet;
9pub use version::DatasetVersion;
10
11use crate::storage::ContentAddress;
12use chrono::{DateTime, Utc};
13use serde::{Deserialize, Serialize};
14use uuid::Uuid;
15
16#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
18pub struct DatasetId(Uuid);
19
20impl DatasetId {
21 #[must_use]
23 pub fn new() -> Self {
24 Self(Uuid::new_v4())
25 }
26
27 #[must_use]
29 pub fn from_uuid(uuid: Uuid) -> Self {
30 Self(uuid)
31 }
32
33 #[must_use]
35 pub fn as_uuid(&self) -> &Uuid {
36 &self.0
37 }
38}
39
40impl Default for DatasetId {
41 fn default() -> Self {
42 Self::new()
43 }
44}
45
46impl std::fmt::Display for DatasetId {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 write!(f, "{}", self.0)
49 }
50}
51
52impl std::str::FromStr for DatasetId {
53 type Err = uuid::Error;
54
55 fn from_str(s: &str) -> Result<Self, Self::Err> {
56 Ok(Self(Uuid::parse_str(s)?))
57 }
58}
59
60#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct DatasetReference {
63 pub name: String,
65 pub version: DatasetVersion,
67}
68
69impl DatasetReference {
70 #[must_use]
72 pub fn new(name: impl Into<String>, version: DatasetVersion) -> Self {
73 Self { name: name.into(), version }
74 }
75}
76
77impl std::fmt::Display for DatasetReference {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 write!(f, "{}:{}", self.name, self.version)
80 }
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct Dataset {
86 pub id: DatasetId,
88 pub name: String,
90 pub version: DatasetVersion,
92 pub content_address: ContentAddress,
94 pub datasheet: Datasheet,
96 pub created_at: DateTime<Utc>,
98}
99
100impl Dataset {
101 #[must_use]
103 pub fn reference(&self) -> DatasetReference {
104 DatasetReference::new(&self.name, self.version.clone())
105 }
106}
107
108#[derive(Debug, Clone, Serialize, Deserialize)]
110#[serde(tag = "type", rename_all = "snake_case")]
111pub enum ProvenanceRecord {
112 WasDerivedFrom {
114 derived: DatasetId,
116 source: DatasetId,
118 transformation: String,
120 },
121 WasGeneratedBy {
123 data: DatasetId,
125 activity: String,
127 timestamp: DateTime<Utc>,
129 },
130 Used {
132 activity: String,
134 data: DatasetId,
136 },
137 WasAttributedTo {
139 entity: DatasetId,
141 agent: String,
143 },
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 #[test]
151 fn test_dataset_id_generation() {
152 let id1 = DatasetId::new();
153 let id2 = DatasetId::new();
154 assert_ne!(id1, id2);
155 }
156
157 #[test]
158 fn test_dataset_id_from_str() {
159 let id = DatasetId::new();
160 let s = id.to_string();
161 let parsed: DatasetId = s.parse().unwrap();
162 assert_eq!(id, parsed);
163 }
164
165 #[test]
166 fn test_dataset_reference_display() {
167 let reference = DatasetReference::new("transactions", DatasetVersion::new(1, 2, 3));
168 assert_eq!(reference.to_string(), "transactions:1.2.3");
169 }
170
171 #[test]
172 fn test_provenance_serialization() {
173 let record = ProvenanceRecord::WasDerivedFrom {
174 derived: DatasetId::new(),
175 source: DatasetId::new(),
176 transformation: "normalize".to_string(),
177 };
178
179 let json = serde_json::to_string(&record).unwrap();
180 assert!(json.contains("was_derived_from"));
181 }
182}