Skip to main content

ceres_core/
lib.rs

1//! Ceres Core - Domain types, business logic, and services.
2//!
3//! This crate provides the core functionality for Ceres, including:
4//!
5//! - **Domain models**: [`Dataset`], [`SearchResult`], etc.
6//! - **Business logic**: Delta detection, statistics tracking
7//! - **Services**: [`HarvestService`] for metadata harvesting, [`EmbeddingService`] for
8//!   standalone embedding, [`HarvestPipeline`] for combined harvest+embed, [`SearchService`]
9//!   for semantic search, [`ExportService`] for streaming exports
10//! - **Traits**: [`EmbeddingProvider`], [`DatasetStore`], [`PortalClient`] for dependency injection
11//! - **Progress reporting**: [`ProgressReporter`] trait for decoupled logging/UI
12//!
13//! # Architecture
14//!
15//! Harvesting and embedding are decoupled: `HarvestService` handles metadata fetching
16//! (no embedding provider needed), `EmbeddingService` handles vector generation
17//! (no portal access needed), and `HarvestPipeline` composes both for the common workflow.
18//!
19//! # Example
20//!
21//! ```ignore
22//! use ceres_core::{HarvestService, HarvestPipeline, PortalType, SearchService};
23//! use ceres_core::progress::TracingReporter;
24//!
25//! // Metadata-only harvesting (no API key needed)
26//! let harvest = HarvestService::new(store.clone(), portal_factory.clone());
27//! let stats = harvest.sync_portal("https://data.gov/api/3").await?;
28//!
29//! // Combined harvest + embed
30//! let pipeline = HarvestPipeline::new(store.clone(), embedding.clone(), portal_factory);
31//! let (sync_result, embed_stats) = pipeline
32//!     .sync_portal_with_progress("https://data.gov/api/3", None, "en", &TracingReporter, PortalType::Ckan)
33//!     .await?;
34//!
35//! // Semantic search
36//! let search = SearchService::new(store, embedding);
37//! let results = search.search("climate data", 10).await?;
38//! ```
39
40pub mod circuit_breaker;
41pub mod config;
42pub mod embedding;
43pub mod error;
44pub mod export;
45pub mod harvest;
46pub mod i18n;
47pub mod job;
48pub mod job_queue;
49pub mod models;
50pub mod parquet_export;
51pub mod pipeline;
52pub mod progress;
53pub mod search;
54pub mod sync;
55pub mod traits;
56pub mod worker;
57
58// Circuit breaker
59pub use circuit_breaker::{
60    CircuitBreaker, CircuitBreakerConfig, CircuitBreakerError, CircuitBreakerStats, CircuitState,
61};
62
63// Configuration
64pub use config::{
65    DbConfig, EmbeddingServiceConfig, HarvestConfig, HttpConfig, PortalEntry, PortalType,
66    PortalsConfig, SyncConfig, default_config_path, load_portals_config,
67};
68
69// Error handling
70pub use error::AppError;
71
72// Internationalization
73pub use i18n::LocalizedField;
74
75// Domain models
76pub use models::{DatabaseStats, Dataset, NewDataset, SearchResult};
77
78// Sync types and business logic
79pub use sync::{
80    AlwaysReprocessDetector, AtomicSyncStats, BatchHarvestSummary, ContentHashDetector,
81    DeltaDetector, PortalHarvestResult, ReprocessingDecision, SyncOutcome, SyncResult, SyncStats,
82    SyncStatus, needs_reprocessing,
83};
84
85// Progress reporting
86pub use progress::{HarvestEvent, ProgressReporter, SilentReporter, TracingReporter};
87
88// Traits for dependency injection
89pub use traits::{DatasetStore, EmbeddingProvider, PortalClient, PortalClientFactory};
90
91// Services (generic over trait implementations)
92pub use embedding::{EmbeddingService, EmbeddingStats};
93pub use export::{ExportFormat, ExportService};
94pub use harvest::HarvestService;
95pub use parquet_export::{ParquetExportConfig, ParquetExportResult, ParquetExportService};
96pub use pipeline::HarvestPipeline;
97pub use search::SearchService;
98
99// Job queue types
100pub use job::{CreateJobRequest, HarvestJob, JobStatus, RetryConfig, WorkerConfig};
101pub use job_queue::JobQueue;
102
103// Worker service
104pub use worker::{
105    SilentWorkerReporter, TracingWorkerReporter, WorkerEvent, WorkerReporter, WorkerService,
106};