use std::collections::HashMap;
use std::future::Future;
use chrono::{DateTime, Utc};
use futures::stream::{self, BoxStream};
use uuid::Uuid;
use crate::config::PortalType;
use crate::{AppError, Dataset, NewDataset, SearchResult};
pub trait EmbeddingProvider: Send + Sync + Clone {
fn name(&self) -> &'static str;
fn dimension(&self) -> usize;
fn generate(&self, text: &str) -> impl Future<Output = Result<Vec<f32>, AppError>> + Send;
fn max_batch_size(&self) -> usize {
1
}
fn generate_batch(
&self,
texts: &[String],
) -> impl Future<Output = Result<Vec<Vec<f32>>, AppError>> + Send {
let texts_owned: Vec<String> = texts.to_vec();
async move {
let mut results = Vec::with_capacity(texts_owned.len());
for text in &texts_owned {
results.push(self.generate(text).await?);
}
Ok(results)
}
}
}
pub trait PortalClient: Send + Sync + Clone {
type PortalData: Send;
fn portal_type(&self) -> &'static str;
fn base_url(&self) -> &str;
fn list_dataset_ids(&self) -> impl Future<Output = Result<Vec<String>, AppError>> + Send;
fn get_dataset(
&self,
id: &str,
) -> impl Future<Output = Result<Self::PortalData, AppError>> + Send;
fn into_new_dataset(
data: Self::PortalData,
portal_url: &str,
url_template: Option<&str>,
language: &str,
) -> NewDataset;
fn search_modified_since(
&self,
since: DateTime<Utc>,
) -> impl Future<Output = Result<Vec<Self::PortalData>, AppError>> + Send;
fn search_all_datasets(
&self,
) -> impl Future<Output = Result<Vec<Self::PortalData>, AppError>> + Send {
async {
Err(AppError::Generic(
"search_all_datasets not supported".to_string(),
))
}
}
fn search_all_datasets_stream(&self) -> BoxStream<'_, Result<Vec<Self::PortalData>, AppError>> {
Box::pin(stream::once(self.search_all_datasets()))
}
fn dataset_count(&self) -> impl Future<Output = Result<usize, AppError>> + Send {
async { Err(AppError::Generic("dataset_count not supported".to_string())) }
}
}
pub trait PortalClientFactory: Send + Sync + Clone {
type Client: PortalClient;
fn create(
&self,
portal_url: &str,
portal_type: PortalType,
language: &str,
profile: Option<&str>,
sparql_endpoint: Option<&str>,
) -> Result<Self::Client, AppError>;
}
pub trait DatasetStore: Send + Sync + Clone {
fn get_by_id(&self, id: Uuid)
-> impl Future<Output = Result<Option<Dataset>, AppError>> + Send;
fn get_hashes_for_portal(
&self,
portal_url: &str,
) -> impl Future<Output = Result<HashMap<String, Option<String>>, AppError>> + Send;
fn update_timestamp_only(
&self,
portal_url: &str,
original_id: &str,
) -> impl Future<Output = Result<(), AppError>> + Send;
fn batch_update_timestamps(
&self,
portal_url: &str,
original_ids: &[String],
) -> impl Future<Output = Result<u64, AppError>> + Send;
fn mark_stale_datasets(
&self,
portal_url: &str,
sync_start: DateTime<Utc>,
) -> impl Future<Output = Result<u64, AppError>> + Send;
fn mark_stale_by_exclusion(
&self,
portal_url: &str,
seen_ids: &[String],
) -> impl Future<Output = Result<u64, AppError>> + Send;
fn upsert(&self, dataset: &NewDataset) -> impl Future<Output = Result<Uuid, AppError>> + Send;
fn batch_upsert(
&self,
datasets: &[NewDataset],
) -> impl Future<Output = Result<Vec<Uuid>, AppError>> + Send;
fn search(
&self,
query_vector: Vec<f32>,
limit: usize,
) -> impl Future<Output = Result<Vec<SearchResult>, AppError>> + Send;
fn list_stream<'a>(
&'a self,
portal_filter: Option<&'a str>,
limit: Option<usize>,
) -> BoxStream<'a, Result<Dataset, AppError>>;
fn get_last_sync_time(
&self,
portal_url: &str,
) -> impl Future<Output = Result<Option<DateTime<Utc>>, AppError>> + Send;
fn record_sync_status(
&self,
portal_url: &str,
sync_time: DateTime<Utc>,
sync_mode: &str,
sync_status: &str,
datasets_synced: i32,
) -> impl Future<Output = Result<(), AppError>> + Send;
fn get_duplicate_titles(
&self,
) -> impl Future<Output = Result<std::collections::HashSet<String>, AppError>> + Send;
fn list_pending_embeddings(
&self,
portal_filter: Option<&str>,
limit: Option<usize>,
) -> impl Future<Output = Result<Vec<Dataset>, AppError>> + Send;
fn count_pending_embeddings(
&self,
portal_filter: Option<&str>,
) -> impl Future<Output = Result<i64, AppError>> + Send;
fn health_check(&self) -> impl Future<Output = Result<(), AppError>> + Send;
}