vantage_dataset/traits/valueset.rs
1use std::pin::Pin;
2
3use crate::record::ActiveRecord;
4
5use super::Result;
6use async_trait::async_trait;
7use futures_core::Stream;
8use indexmap::IndexMap;
9use vantage_types::Record;
10
11/// Foundation trait for all dataset operations, defining the basic types used for storage.
12/// Typically you would implement ValueSet in combination with:
13///
14/// - [`ReadableValueSet`]
15/// - [`WritableValueSet`]
16/// - [`InsertableValueSet`]
17///
18/// `ValueSet` establishes the contract for working with raw storage values, providing
19/// the building blocks that higher-level [`DataSet`](super::DataSet) traits build upon.
20/// This separation allows the same storage backend to work with both typed entities
21/// and raw values efficiently.
22///
23/// # Type Parameters
24///
25/// - `Id`: Unique identifier type chosen by the storage implementation
26/// - `Value`: Raw storage value type, typically JSON-like structures
27///
28/// # Example
29///
30/// ```rust,ignore
31/// use vantage_dataset::{ReadableValueSet, ValueSet, prelude::*};
32/// use vantage_types::Record;
33/// use serde_json::Value;
34///
35/// struct CsvFile {
36/// filename: String,
37/// }
38///
39/// impl ValueSet for CsvFile {
40/// type Id = String;
41/// type Value = serde_json::Value;
42/// }
43///
44/// #[async_trait]
45/// impl ReadableValueSet for CsvFile {
46/// async fn list_values(&self) -> Result<IndexMap<Self::Id, Record<Self::Value>>> {
47/// // Parse CSV and return as JSON values
48/// // Implementation details...
49/// }
50///
51/// async fn get_value(&self, id: &Self::Id) -> Result<Self::Value> {
52/// // Find specific record by ID
53/// // Implementation details...
54/// }
55///
56/// async fn get_some_value(&self) -> Result<Option<(Self::Id, Self::Value)>> {
57/// // Return first record if any exists
58/// // Implementation details...
59/// }
60/// }
61/// ```
62#[async_trait]
63pub trait ValueSet {
64 /// Unique identifier type for records in this storage backend.
65 ///
66 /// Common choices:
67 /// - `String` for most databases and APIs
68 /// - `uuid::Uuid` if database does not support other types of IDs.
69 /// - Database-specific types like `surrealdb::sql::Thing`
70 type Id: Send + Sync + Clone;
71
72 /// Raw storage value type, representing data as stored in the backend, like
73 /// serde_json::Value or cborium::Value. Can also be a custom type.
74 type Value: Send + Sync + Clone;
75}
76
77/// Read-only access to raw storage values without entity deserialization.
78///
79/// See documentation for [`ValueSet`] for implementation example.
80#[async_trait]
81pub trait ReadableValueSet: ValueSet {
82 /// Retrieve all records as raw storage values preserving insertion order where supported.
83 ///
84 /// # Performance
85 /// In Vantage you can't retrieve values of a Set partially. Instead you should
86 /// create a sub-set of your existing set, then list values of that set instead.
87 async fn list_values(&self) -> Result<IndexMap<Self::Id, Record<Self::Value>>>;
88
89 /// Retrieve a specific record by ID as a structured record.
90 async fn get_value(&self, id: &Self::Id) -> Result<Record<Self::Value>>;
91
92 /// Retrieve one single record from the set. If records are ordered - return first record.
93 /// will return Ok(None).
94 ///
95 /// Useful when you operate with a very specific subset of data.
96 async fn get_some_value(&self) -> Result<Option<(Self::Id, Record<Self::Value>)>>;
97
98 /// Stream all records as (Id, Record) pairs.
99 ///
100 /// Default wraps `list_values()`. Backends with native streaming
101 /// (e.g. paginated REST APIs) can override for incremental fetching.
102 #[allow(clippy::type_complexity)]
103 fn stream_values(
104 &self,
105 ) -> Pin<Box<dyn Stream<Item = Result<(Self::Id, Record<Self::Value>)>> + Send + '_>>
106 where
107 Self: Sync,
108 {
109 Box::pin(async_stream::stream! {
110 let records = self.list_values().await;
111 match records {
112 Ok(map) => {
113 for item in map {
114 yield Ok(item);
115 }
116 }
117 Err(e) => yield Err(e),
118 }
119 })
120 }
121}
122
123/// Write operations on raw storage values with idempotent behavior.
124///
125/// See documentation for [`ValueSet`] for implementation example.
126#[async_trait]
127pub trait WritableValueSet: ValueSet {
128 /// Insert value with a specific ID (often generated) (HTTP POST with ID)
129 ///
130 /// **Idempotent**: Succeeds if no record exists with the given ID. If
131 /// record already exists, must return success without overwriting
132 /// data, returning original data.
133 ///
134 /// **Returns**: Record as it was stored.
135 ///
136 /// # Use Case
137 /// Generate unique ID and store record while avoiding duplicates.
138 async fn insert_value(
139 &self,
140 id: &Self::Id,
141 record: &Record<Self::Value>,
142 ) -> Result<Record<Self::Value>>;
143
144 /// Replace the entire record at the specified ID (HTTP PUT)
145 ///
146 /// **Idempotent**: Always succeeds, completely overwrites existing data
147 /// if present. If possible, will remove/recreate record; therefore if
148 /// `record` doesn't contain certain attributes which were present in the
149 /// database, those will be removed. If record does not exist, will
150 /// create it.
151 ///
152 /// **Returns**: Record as it was stored.
153 ///
154 /// # Use Case
155 /// Replace with a new version of a record.
156 async fn replace_value(
157 &self,
158 id: &Self::Id,
159 record: &Record<Self::Value>,
160 ) -> Result<Record<Self::Value>>;
161
162 /// Partially update a record by merging with the provided value (HTTP PATCH)
163 ///
164 /// **Fails if record doesn't exist**. The exact merge behavior depends on
165 /// the storage implementation - typically merges object fields for JSON-like values.
166 ///
167 /// **Returns**: Record as it was stored (not only the partial change).
168 ///
169 /// # Use Case
170 /// Update only the modified fields of a record.
171 async fn patch_value(
172 &self,
173 id: &Self::Id,
174 partial: &Record<Self::Value>,
175 ) -> Result<Record<Self::Value>>;
176
177 /// Delete a record by ID (HTTP DELETE)
178 ///
179 /// **Idempotent**: Always succeeds, even if the record doesn't exist.
180 /// This allows safe cleanup operations without checking existence first.
181 async fn delete(&self, id: &Self::Id) -> Result<()>;
182
183 /// Delete all records in the set (HTTP DELETE without ID)
184 ///
185 /// **Idempotent**: All records in the set will be deleted.
186 /// Executing several times is OK.
187 ///
188 /// Execute on a subset of your entire database.
189 async fn delete_all(&self) -> Result<()>;
190}
191
192/// Append-only operations on raw storage values with automatic ID generation.
193///
194/// See documentation for [`ValueSet`] for implementation example.
195#[async_trait]
196pub trait InsertableValueSet: ValueSet {
197 /// Insert a value and return the generated ID (Similar to HTTP POST without ID)
198 ///
199 /// The storage backend generates a unique identifier for the new record.
200 ///
201 /// # Warning
202 ///
203 /// This method is **not idempotent** - each call creates a new record with
204 /// a new ID, even if the value data is identical.
205 async fn insert_return_id_value(&self, record: &Record<Self::Value>) -> Result<Self::Id>;
206}
207
208/// Change tracking for raw storage values with automatic persistence.
209///
210/// See documentation for [`ValueSet`] for implementation example.
211#[async_trait]
212pub trait ActiveRecordSet: ReadableValueSet + WritableValueSet {
213 /// Retrieve a record wrapped for change tracking and deferred persistence.
214 ///
215 /// The returned `RecordValue` can be modified in-place and will track all
216 /// changes for efficient persistence when `save()` is called.
217 ///
218 /// # Returns
219 ///
220 /// - `Ok(RecordValue)`: Record wrapper with change tracking
221 /// - `Err`: If record doesn't exist or cannot be loaded
222 async fn get_value_record(&self, id: &Self::Id) -> Result<ActiveRecord<'_, Self>>;
223
224 /// Retrieve all records wrapped for change tracking.
225 ///
226 /// Each returned `RecordValue` operates independently - modifications to one
227 /// record don't affect others, and each must be saved separately.
228 ///
229 /// # Performance Note
230 ///
231 /// This loads all records into memory. Consider pagination or streaming
232 /// approaches for large datasets.
233 async fn list_value_records(&self) -> Result<Vec<ActiveRecord<'_, Self>>>;
234}
235
236// Auto-implement for any type that has both readable and writable traits
237#[async_trait]
238impl<T> ActiveRecordSet for T
239where
240 T: ReadableValueSet + WritableValueSet + Sync,
241{
242 async fn get_value_record(&self, id: &Self::Id) -> Result<ActiveRecord<'_, Self>> {
243 let record = self.get_value(id).await?;
244 Ok(ActiveRecord::new(id.clone(), record, self))
245 }
246
247 async fn list_value_records(&self) -> Result<Vec<ActiveRecord<'_, Self>>> {
248 let items = self.list_values().await?;
249
250 Ok(items
251 .into_iter()
252 .map(|(id, record)| ActiveRecord::new(id, record, self))
253 .collect::<Vec<_>>())
254 }
255}