pub trait DatasetStore:
Send
+ Sync
+ Clone {
// Required methods
fn get_by_id(
&self,
id: Uuid,
) -> impl Future<Output = Result<Option<Dataset>, AppError>> + Send;
fn get_hashes_for_portal(
&self,
portal_url: &str,
) -> impl Future<Output = Result<HashMap<String, Option<String>>, AppError>> + Send;
fn update_timestamp_only(
&self,
portal_url: &str,
original_id: &str,
) -> impl Future<Output = Result<(), AppError>> + Send;
fn batch_update_timestamps(
&self,
portal_url: &str,
original_ids: &[String],
) -> impl Future<Output = Result<u64, AppError>> + Send;
fn upsert(
&self,
dataset: &NewDataset,
) -> impl Future<Output = Result<Uuid, AppError>> + Send;
fn search(
&self,
query_vector: Vector,
limit: usize,
) -> impl Future<Output = Result<Vec<SearchResult>, AppError>> + Send;
fn list_stream<'a>(
&'a self,
portal_filter: Option<&'a str>,
limit: Option<usize>,
) -> BoxStream<'a, Result<Dataset, AppError>>;
fn get_last_sync_time(
&self,
portal_url: &str,
) -> impl Future<Output = Result<Option<DateTime<Utc>>, AppError>> + Send;
fn record_sync_status(
&self,
portal_url: &str,
sync_time: DateTime<Utc>,
sync_mode: &str,
sync_status: &str,
datasets_synced: i32,
) -> impl Future<Output = Result<(), AppError>> + Send;
fn health_check(&self) -> impl Future<Output = Result<(), AppError>> + Send;
}Expand description
Store for dataset persistence and retrieval.
Implementations handle database operations for datasets.
Required Methods§
Sourcefn get_by_id(
&self,
id: Uuid,
) -> impl Future<Output = Result<Option<Dataset>, AppError>> + Send
fn get_by_id( &self, id: Uuid, ) -> impl Future<Output = Result<Option<Dataset>, AppError>> + Send
Sourcefn get_hashes_for_portal(
&self,
portal_url: &str,
) -> impl Future<Output = Result<HashMap<String, Option<String>>, AppError>> + Send
fn get_hashes_for_portal( &self, portal_url: &str, ) -> impl Future<Output = Result<HashMap<String, Option<String>>, AppError>> + Send
Sourcefn update_timestamp_only(
&self,
portal_url: &str,
original_id: &str,
) -> impl Future<Output = Result<(), AppError>> + Send
fn update_timestamp_only( &self, portal_url: &str, original_id: &str, ) -> impl Future<Output = Result<(), AppError>> + Send
Updates only the timestamp for an unchanged dataset.
Used when content hash matches but we want to track “last seen” time.
§Arguments
portal_url- The source portal URLoriginal_id- The dataset’s original ID from the portal
Sourcefn batch_update_timestamps(
&self,
portal_url: &str,
original_ids: &[String],
) -> impl Future<Output = Result<u64, AppError>> + Send
fn batch_update_timestamps( &self, portal_url: &str, original_ids: &[String], ) -> impl Future<Output = Result<u64, AppError>> + Send
Sourcefn upsert(
&self,
dataset: &NewDataset,
) -> impl Future<Output = Result<Uuid, AppError>> + Send
fn upsert( &self, dataset: &NewDataset, ) -> impl Future<Output = Result<Uuid, AppError>> + Send
Sourcefn search(
&self,
query_vector: Vector,
limit: usize,
) -> impl Future<Output = Result<Vec<SearchResult>, AppError>> + Send
fn search( &self, query_vector: Vector, limit: usize, ) -> impl Future<Output = Result<Vec<SearchResult>, AppError>> + Send
Sourcefn list_stream<'a>(
&'a self,
portal_filter: Option<&'a str>,
limit: Option<usize>,
) -> BoxStream<'a, Result<Dataset, AppError>>
fn list_stream<'a>( &'a self, portal_filter: Option<&'a str>, limit: Option<usize>, ) -> BoxStream<'a, Result<Dataset, AppError>>
Lists datasets as a stream with optional filtering.
This method returns a stream of datasets for memory-efficient processing of large result sets. Unlike batch methods, it streams results directly from the database without loading everything into memory.
§Arguments
portal_filter- Optional portal URL to filter bylimit- Optional maximum number of records
Sourcefn get_last_sync_time(
&self,
portal_url: &str,
) -> impl Future<Output = Result<Option<DateTime<Utc>>, AppError>> + Send
fn get_last_sync_time( &self, portal_url: &str, ) -> impl Future<Output = Result<Option<DateTime<Utc>>, AppError>> + Send
Sourcefn record_sync_status(
&self,
portal_url: &str,
sync_time: DateTime<Utc>,
sync_mode: &str,
sync_status: &str,
datasets_synced: i32,
) -> impl Future<Output = Result<(), AppError>> + Send
fn record_sync_status( &self, portal_url: &str, sync_time: DateTime<Utc>, sync_mode: &str, sync_status: &str, datasets_synced: i32, ) -> impl Future<Output = Result<(), AppError>> + Send
Records a sync status for a portal.
Called after a harvest operation to update the sync status.
The sync_status parameter indicates the outcome: “completed” or “cancelled”.
§Arguments
portal_url- The source portal URLsync_time- The timestamp of this syncsync_mode- Either “full” or “incremental”sync_status- The outcome: “completed” or “cancelled”datasets_synced- Number of datasets processed
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.