use std::collections::HashMap;
use std::sync::Arc;
#[cfg(not(target_arch = "wasm32"))]
use tracing::instrument;
use crate::error::{MemoryError, Result};
use crate::hyperdim::HVec10240;
use crate::metadata_filter::MetadataFilter;
use crate::singularity::{Singularity, unix_now_secs};
use crate::singularity_retrieval::{CandidateSource, FilterStrategy};
impl Singularity {
#[cfg_attr(
not(target_arch = "wasm32"),
instrument(skip(self), fields(ids_count = ids.len()))
)]
pub fn bundle_concepts_strict(&self, ids: &[String]) -> Result<HVec10240> {
let mut vectors = Vec::with_capacity(ids.len());
for id in ids {
match self.concepts.get(id) {
Some(concept) => vectors.push(concept.vector),
None => {
return Err(MemoryError::NotFound {
entity: "Concept".to_string(),
id: id.clone(),
});
}
}
}
HVec10240::bundle(&vectors)
}
#[cfg_attr(
not(target_arch = "wasm32"),
instrument(skip(self), fields(from_id = %from, to_id = %to))
)]
pub fn disassociate(&mut self, from: &str, to: &str) -> Result<()> {
if !self.concepts.contains_key(from) {
return Err(MemoryError::NotFound {
entity: "Concept".to_string(),
id: from.to_string(),
});
}
if let Some(links) = self.associations.get_mut(from) {
links.remove(to);
}
self.invalidate_cache();
Ok(())
}
#[cfg_attr(
not(target_arch = "wasm32"),
instrument(skip(self), fields(concept_id = %id))
)]
pub fn clear_associations(&mut self, id: &str) -> Result<()> {
if !self.concepts.contains_key(id) {
return Err(MemoryError::NotFound {
entity: "Concept".to_string(),
id: id.to_string(),
});
}
self.associations.remove(id);
self.invalidate_cache();
Ok(())
}
pub fn clear_similarity_cache(&self) {
self.invalidate_cache();
}
#[cfg_attr(
not(target_arch = "wasm32"),
instrument(skip(self), fields(concept_id = %id))
)]
pub fn update_metadata(
&mut self,
id: &str,
metadata: HashMap<String, serde_json::Value>,
) -> Result<()> {
if let Some(concept) = self.concepts.get_mut(id) {
concept.metadata = metadata;
concept.modified_at = unix_now_secs();
Ok(())
} else {
Err(MemoryError::NotFound {
entity: "Concept".to_string(),
id: id.to_string(),
})
}
}
#[cfg_attr(
not(target_arch = "wasm32"),
instrument(skip(self, query), fields(top_k = top_k))
)]
pub fn find_similar_filtered(
&self,
query: &HVec10240,
top_k: usize,
filter: &MetadataFilter,
) -> Arc<[(String, f32)]> {
let start_ns = crate::singularity::unix_now_ns();
if top_k == 0 || self.concepts.is_empty() {
return Arc::from(Vec::new());
}
let total_count = self.concepts.len();
let matching_count = self
.concepts
.values()
.filter(|c| filter.matches(&c.metadata))
.count();
let selectivity = matching_count as f32 / total_count as f32;
if matching_count == 0 {
return Arc::from(Vec::new());
}
let strategy = if total_count < 20 || selectivity < 0.3 {
FilterStrategy::Pre
} else if selectivity < 0.8 {
FilterStrategy::BucketPost
} else {
FilterStrategy::ScanPost
};
match strategy {
FilterStrategy::Pre => {
let cand_start = crate::singularity::unix_now_ns();
let candidates: Vec<usize> = self
.concepts
.iter()
.filter(|(_, concept)| filter.matches(&concept.metadata))
.filter_map(|(id, _)| self.id_to_index.get(id).copied())
.collect();
let cand_ns = crate::singularity::unix_now_ns().saturating_sub(cand_start);
self.scored_candidate_retrieval_with_stats(
crate::singularity_retrieval::ScoredCandidateParams {
query,
top_k,
candidates,
start_ns,
cand_ns,
source: CandidateSource::Metadata,
bypass_cache: true,
},
selectivity,
Some(strategy),
)
}
FilterStrategy::BucketPost => {
let cand_start = crate::singularity::unix_now_ns();
let candidates = self.generate_bucket_candidates(query);
let cand_ns = crate::singularity::unix_now_ns().saturating_sub(cand_start);
let all_results = self.scored_candidate_retrieval_with_stats(
crate::singularity_retrieval::ScoredCandidateParams {
query,
top_k: top_k * 2,
candidates,
start_ns,
cand_ns,
source: CandidateSource::Bucket,
bypass_cache: true,
},
selectivity,
Some(strategy),
);
let filtered: Vec<(String, f32)> = all_results
.iter()
.filter(|(id, _)| {
self.concepts
.get(id)
.map(|c| filter.matches(&c.metadata))
.unwrap_or(false)
})
.take(top_k)
.map(|(id, score)| (id.clone(), *score))
.collect();
Arc::from(filtered)
}
FilterStrategy::ScanPost => {
let all_results = self.exact_similarity_scan(query, top_k * 2, start_ns, true);
let filtered: Vec<(String, f32)> = all_results
.iter()
.filter(|(id, _)| {
self.concepts
.get(id)
.map(|c| filter.matches(&c.metadata))
.unwrap_or(false)
})
.take(top_k)
.map(|(id, score)| (id.clone(), *score))
.collect();
if let Ok(mut s) = self.last_retrieval_stats.write() {
*s = crate::singularity_retrieval::RetrievalStats {
candidate_count: matching_count,
scored_count: filtered.len(),
fell_back_to_exact_scan: true,
candidate_ns: 0,
scoring_ns: 0,
selectivity_ratio: selectivity,
filter_strategy: Some(strategy),
};
}
Arc::from(filtered)
}
}
}
}