pub struct DatasetRepository { /* private fields */ }Expand description
Repository for dataset persistence in PostgreSQL with pgvector.
§Examples
use sqlx::postgres::PgPoolOptions;
use ceres_db::DatasetRepository;
let pool = PgPoolOptions::new()
.max_connections(5)
.connect("postgresql://localhost/ceres")
.await?;
let repo = DatasetRepository::new(pool);Implementations§
Source§impl DatasetRepository
impl DatasetRepository
pub fn new(pool: PgPool) -> Self
Sourcepub async fn upsert(&self, new_data: &NewDataset) -> Result<Uuid, AppError>
pub async fn upsert(&self, new_data: &NewDataset) -> Result<Uuid, AppError>
Inserts or updates a dataset. Returns the UUID of the affected row.
TODO(robustness): Return UpsertOutcome to distinguish insert vs update
Currently returns only UUID without indicating operation type.
Consider: pub enum UpsertOutcome { Created(Uuid), Updated(Uuid) }
This enables accurate progress reporting in sync statistics.
Sourcepub async fn get_hashes_for_portal(
&self,
portal_url: &str,
) -> Result<HashMap<String, Option<String>>, AppError>
pub async fn get_hashes_for_portal( &self, portal_url: &str, ) -> Result<HashMap<String, Option<String>>, AppError>
Returns a map of original_id → content_hash for all datasets from a portal.
TODO(performance): Optimize for large portals (100k+ datasets) Currently loads entire HashMap into memory. Consider: (1) Streaming hash comparison during sync, or (2) Database-side hash check with WHERE clause, or (3) Bloom filter for approximate membership testing
Sourcepub async fn update_timestamp_only(
&self,
portal_url: &str,
original_id: &str,
) -> Result<bool, AppError>
pub async fn update_timestamp_only( &self, portal_url: &str, original_id: &str, ) -> Result<bool, AppError>
Updates only the timestamp for unchanged datasets. Returns true if a row was updated.
Sourcepub async fn get(&self, id: Uuid) -> Result<Option<Dataset>, AppError>
pub async fn get(&self, id: Uuid) -> Result<Option<Dataset>, AppError>
Retrieves a dataset by UUID.
Sourcepub async fn search(
&self,
query_vector: Vector,
limit: usize,
) -> Result<Vec<SearchResult>, AppError>
pub async fn search( &self, query_vector: Vector, limit: usize, ) -> Result<Vec<SearchResult>, AppError>
Semantic search using cosine similarity. Returns results ordered by similarity.
Sourcepub async fn list_all(
&self,
portal_filter: Option<&str>,
limit: Option<usize>,
) -> Result<Vec<Dataset>, AppError>
pub async fn list_all( &self, portal_filter: Option<&str>, limit: Option<usize>, ) -> Result<Vec<Dataset>, AppError>
Lists datasets with optional portal filter and limit.
TODO(config): Make default limit configurable via DEFAULT_EXPORT_LIMIT env var Currently hardcoded to 10000. For large exports, consider streaming instead.
TODO(performance): Implement streaming/pagination for memory efficiency
Loading all datasets into memory doesn’t scale. Consider returning
impl Stream<Item = Result<Dataset, AppError>> or cursor-based pagination.
Sourcepub async fn get_stats(&self) -> Result<DatabaseStats, AppError>
pub async fn get_stats(&self) -> Result<DatabaseStats, AppError>
Returns aggregated database statistics.
Trait Implementations§
Source§impl Clone for DatasetRepository
impl Clone for DatasetRepository
Source§fn clone(&self) -> DatasetRepository
fn clone(&self) -> DatasetRepository
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreAuto Trait Implementations§
impl Freeze for DatasetRepository
impl !RefUnwindSafe for DatasetRepository
impl Send for DatasetRepository
impl Sync for DatasetRepository
impl Unpin for DatasetRepository
impl !UnwindSafe for DatasetRepository
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more