#[cfg(feature = "alloc")]
use alloc::{string::String, vec::Vec};
use hashbrown::HashSet;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Default)]
#[non_exhaustive]
pub enum SourceKind {
#[default]
Sparql,
Cache,
P2pIpfs {
multiaddr: String,
},
P2pLibp2p {
peer_id: String,
},
Custom(String),
}
impl SourceKind {
#[must_use]
pub fn is_p2p(&self) -> bool {
matches!(self, Self::P2pIpfs { .. } | Self::P2pLibp2p { .. })
}
#[must_use]
pub fn is_cache(&self) -> bool {
matches!(self, Self::Cache)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataSource {
pub id: String,
pub endpoint: String,
pub capabilities: SourceCapabilities,
pub stats: SourceStats,
pub regions: SmallVec<[String; 4]>,
pub vocabularies: HashSet<String>,
pub available: bool,
pub priority: f32,
#[serde(default)]
pub kind: SourceKind,
}
impl DataSource {
#[must_use]
pub fn new(id: impl Into<String>, endpoint: impl Into<String>) -> Self {
Self {
id: id.into(),
endpoint: endpoint.into(),
capabilities: SourceCapabilities::default(),
stats: SourceStats::default(),
regions: SmallVec::new(),
vocabularies: HashSet::new(),
available: true,
priority: 1.0,
kind: SourceKind::default(),
}
}
#[must_use]
pub fn with_kind(mut self, kind: SourceKind) -> Self {
self.kind = kind;
self
}
#[must_use]
pub fn with_region(mut self, region: impl Into<String>) -> Self {
self.regions.push(region.into());
self
}
#[must_use]
pub fn with_vocabulary(mut self, vocab: impl Into<String>) -> Self {
self.vocabularies.insert(vocab.into());
self
}
#[must_use]
pub const fn with_capabilities(mut self, capabilities: SourceCapabilities) -> Self {
self.capabilities = capabilities;
self
}
#[must_use]
pub const fn with_priority(mut self, priority: f32) -> Self {
self.priority = priority;
self
}
pub fn update_stats(&mut self, latency_ms: u32, success: bool, result_count: u32) {
self.stats.total_queries += 1;
if success {
self.stats.successful_queries += 1;
}
self.stats.total_results += u64::from(result_count);
let n = self.stats.total_queries as f32;
self.stats.avg_latency_ms = ((n - 1.0) * self.stats.avg_latency_ms + latency_ms as f32) / n;
self.stats.success_rate =
self.stats.successful_queries as f32 / self.stats.total_queries as f32;
}
#[must_use]
pub fn supports_vocabulary(&self, vocab: &str) -> bool {
self.vocabularies.contains(vocab)
}
#[must_use]
pub fn in_region(&self, region: &str) -> bool {
self.regions.iter().any(|r| r == region)
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
pub struct SourceCapabilities {
pub sparql_1_1: bool,
pub federation: bool,
pub construct: bool,
pub ask: bool,
pub describe: bool,
pub property_paths: bool,
pub aggregation: bool,
pub subqueries: bool,
pub bind: bool,
pub values: bool,
pub max_results: u32,
pub full_text_search: bool,
pub geospatial: bool,
}
impl SourceCapabilities {
#[must_use]
pub const fn basic() -> Self {
Self {
sparql_1_1: false,
federation: false,
construct: true,
ask: true,
describe: true,
property_paths: false,
aggregation: false,
subqueries: false,
bind: false,
values: false,
max_results: 10000,
full_text_search: false,
geospatial: false,
}
}
#[must_use]
pub const fn full() -> Self {
Self {
sparql_1_1: true,
federation: true,
construct: true,
ask: true,
describe: true,
property_paths: true,
aggregation: true,
subqueries: true,
bind: true,
values: true,
max_results: 0,
full_text_search: false,
geospatial: false,
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
pub struct SourceStats {
pub total_queries: u32,
pub successful_queries: u32,
pub total_results: u64,
pub avg_latency_ms: f32,
pub success_rate: f32,
pub last_query_time: u64,
#[serde(default)]
pub consecutive_failures: u32,
#[serde(default)]
pub tripped_until_ms: Option<u64>,
}
impl SourceStats {
#[must_use]
pub const fn has_history(&self) -> bool {
self.total_queries > 0
}
#[must_use]
pub fn avg_results_per_query(&self) -> f64 {
if self.total_queries == 0 {
0.0
} else {
self.total_results as f64 / self.total_queries as f64
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SourceSelection {
pub source_id: String,
pub confidence: f32,
pub estimated_latency_ms: u32,
pub reason: SelectionReason,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum SelectionReason {
ModelPrediction,
VocabularyMatch,
GeographicProximity,
HistoricalPerformance,
Fallback,
UserPreference,
ComplianceRequired,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SourceRanking {
pub sources: Vec<SourceSelection>,
pub processing_time_us: u64,
pub ml_used: bool,
pub context_used: bool,
}
impl SourceRanking {
#[must_use]
pub const fn new() -> Self {
Self {
sources: Vec::new(),
processing_time_us: 0,
ml_used: false,
context_used: false,
}
}
pub fn add(&mut self, selection: SourceSelection) {
self.sources.push(selection);
}
pub fn sort_by_confidence(&mut self) {
self.sources.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(core::cmp::Ordering::Equal)
});
}
#[must_use]
pub fn top(&self, n: usize) -> &[SourceSelection] {
let end = n.min(self.sources.len());
&self.sources[..end]
}
#[must_use]
pub fn best(&self) -> Option<&SourceSelection> {
self.sources.first()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.sources.is_empty()
}
#[must_use]
pub fn len(&self) -> usize {
self.sources.len()
}
}