#![deny(clippy::all)]
#[cfg(feature = "node-api")]
use napi::bindgen_prelude::*;
#[cfg(feature = "node-api")]
use napi_derive::napi;
#[cfg(feature = "node-api")]
use std::sync::Arc;
#[cfg(feature = "node-api")]
use tracing::info_span;
pub mod core;
pub use crate::core::entangled::EntangledHVec;
pub use crate::core::error::HmsError;
pub use crate::core::types::{ConceptCandidate, MemorizeBatchItem, RetrievalResult, TextMetrics};
pub use crate::core::HmsCore;
#[cfg(feature = "node-api")]
#[napi]
pub struct HolographicMemorySystem {
core: Arc<HmsCore>,
}
#[cfg(feature = "node-api")]
fn napi_err(e: anyhow::Error) -> napi::Error {
napi::Error::from_reason(e.to_string())
}
#[cfg(feature = "node-api")]
async fn run_async<T: Send + 'static, F: FnOnce() -> anyhow::Result<T> + Send + 'static>(
f: F,
) -> Result<T> {
tokio::task::spawn_blocking(f)
.await
.map_err(|e| napi::Error::from_reason(e.to_string()))?
.map_err(napi_err)
}
#[cfg(feature = "node-api")]
#[napi(object)]
pub struct HmsConfigJs {
pub nsg_max_degree: Option<u32>,
pub nsg_ef_construction: Option<u32>,
pub nsg_auto_threshold: Option<u32>,
pub ivf_enabled: Option<bool>,
pub ivf_n_clusters: Option<u32>,
pub ivf_n_landmarks: Option<u32>,
pub ivf_d_reduced: Option<u32>,
pub ivf_n_probe: Option<u32>,
pub ivf_auto_threshold: Option<u32>,
pub shard_enabled: Option<bool>,
pub shard_count: Option<u32>,
pub shard_auto_threshold: Option<u32>,
pub shard_target_size: Option<u32>,
pub component_similarity_threshold: Option<f64>,
pub component_max_neighbors: Option<u32>,
pub concept_similarity_threshold: Option<f64>,
pub concept_min_cluster_size: Option<u32>,
pub diffusion_steps: Option<u32>,
pub diffusion_sigma_max: Option<f64>,
pub diffusion_sigma_min: Option<f64>,
pub diffusion_step_size: Option<f64>,
pub diffusion_n_langevin: Option<u32>,
pub signing_enabled: Option<bool>,
pub signing_key_path: Option<String>,
pub encryption_enabled: Option<bool>,
pub encryption_passphrase: Option<String>,
pub audit_enabled: Option<bool>,
pub dp_enabled: Option<bool>,
pub dp_epsilon: Option<f64>,
pub meaning_enabled: Option<bool>,
pub meaning_beta: Option<f64>,
pub meaning_max_fanout: Option<u32>,
pub meaning_auto_decompose: Option<bool>,
pub meaning_max_hop_depth: Option<u32>,
pub cognition_enabled: Option<bool>,
pub cognition_interval_secs: Option<u32>,
pub cognition_min_pattern_freq: Option<u32>,
pub cognition_min_abstraction_members: Option<u32>,
pub cognition_min_hypothesis_confidence: Option<f64>,
}
#[cfg(feature = "node-api")]
impl HmsConfigJs {
fn into_config(self) -> crate::core::config::HmsConfig {
use crate::core::config::*;
let mut cfg = HmsConfig::default();
if let Some(v) = self.nsg_max_degree {
cfg.nsg.max_degree = v as usize;
}
if let Some(v) = self.nsg_ef_construction {
cfg.nsg.ef_construction = v as usize;
}
if let Some(v) = self.nsg_auto_threshold {
cfg.nsg.auto_threshold = v as usize;
}
if let Some(v) = self.ivf_enabled {
cfg.ivf.enabled = v;
}
if let Some(v) = self.ivf_n_clusters {
cfg.ivf.n_clusters = v as usize;
}
if let Some(v) = self.ivf_n_landmarks {
cfg.ivf.n_landmarks = v as usize;
}
if let Some(v) = self.ivf_d_reduced {
cfg.ivf.d_reduced = v as usize;
}
if let Some(v) = self.ivf_n_probe {
cfg.ivf.n_probe = v as usize;
}
if let Some(v) = self.ivf_auto_threshold {
cfg.ivf.auto_threshold = v as usize;
}
if let Some(v) = self.shard_enabled {
cfg.shard.enabled = v;
}
if let Some(v) = self.shard_count {
cfg.shard.shard_count = v as usize;
}
if let Some(v) = self.shard_auto_threshold {
cfg.shard.auto_threshold = v as usize;
}
if let Some(v) = self.shard_target_size {
cfg.shard.target_shard_size = v as usize;
}
if let Some(v) = self.component_similarity_threshold {
cfg.query.component_similarity_threshold = v;
}
if let Some(v) = self.component_max_neighbors {
cfg.query.component_max_neighbors = v;
}
if let Some(v) = self.concept_similarity_threshold {
cfg.concepts.similarity_threshold = v;
}
if let Some(v) = self.concept_min_cluster_size {
cfg.concepts.min_cluster_size = v as usize;
}
if let Some(v) = self.diffusion_steps {
cfg.diffusion.steps = v as usize;
}
if let Some(v) = self.diffusion_sigma_max {
cfg.diffusion.sigma_max = v;
}
if let Some(v) = self.diffusion_sigma_min {
cfg.diffusion.sigma_min = v;
}
if let Some(v) = self.diffusion_step_size {
cfg.diffusion.step_size = v;
}
if let Some(v) = self.diffusion_n_langevin {
cfg.diffusion.n_langevin = v as usize;
}
if let Some(v) = self.signing_enabled {
cfg.security.signing_enabled = v;
}
if let Some(v) = self.signing_key_path {
cfg.security.key_path = Some(v);
}
if let Some(v) = self.encryption_enabled {
cfg.security.encryption_enabled = v;
}
if let Some(v) = self.encryption_passphrase {
cfg.security.encryption_passphrase = Some(v);
}
if let Some(v) = self.audit_enabled {
cfg.security.audit_enabled = v;
}
if let Some(v) = self.dp_enabled {
cfg.privacy.dp_enabled = v;
}
if let Some(v) = self.dp_epsilon {
cfg.privacy.epsilon = v;
}
if let Some(v) = self.meaning_enabled {
cfg.meaning.enabled = v;
}
if let Some(v) = self.meaning_beta {
cfg.meaning.beta = v;
}
if let Some(v) = self.meaning_max_fanout {
cfg.meaning.algebraic_max_fanout = v as usize;
}
if let Some(v) = self.meaning_auto_decompose {
cfg.meaning.auto_decompose = v;
}
if let Some(v) = self.meaning_max_hop_depth {
cfg.meaning.max_hop_depth = v as usize;
}
if let Some(v) = self.cognition_enabled {
cfg.cognition.enabled = v;
}
if let Some(v) = self.cognition_interval_secs {
cfg.cognition.interval_secs = v as u64;
}
if let Some(v) = self.cognition_min_pattern_freq {
cfg.cognition.min_pattern_freq = v as usize;
}
if let Some(v) = self.cognition_min_abstraction_members {
cfg.cognition.min_abstraction_members = v as usize;
}
if let Some(v) = self.cognition_min_hypothesis_confidence {
cfg.cognition.min_hypothesis_confidence = v;
}
cfg
}
}
#[cfg(feature = "node-api")]
#[napi]
impl HolographicMemorySystem {
#[napi(constructor)]
pub fn new(
dimensions: u32,
storage_path: Option<String>,
config: Option<HmsConfigJs>,
) -> Result<Self> {
let cfg = config.map(|c| c.into_config());
let core = HmsCore::new(dimensions, storage_path, cfg).map_err(napi_err)?;
Ok(Self {
core: Arc::new(core),
})
}
#[napi(getter)]
pub fn vector_count(&self) -> u32 {
self.core.vector_count() as u32
}
#[napi(getter)]
pub fn nsg_trained(&self) -> bool {
self.core.nsg_trained()
}
#[napi(getter)]
pub fn ivf_trained(&self) -> bool {
self.core.ivf_trained()
}
#[napi(getter)]
pub fn dimensions(&self) -> u32 {
self.core.dimensions() as u32
}
#[napi]
pub async fn analyze_text(&self, text: String) -> Result<TextMetrics> {
let core = self.core.clone();
run_async(move || Ok(core.analyze_text(&text))).await
}
#[napi]
pub async fn calculate_readability(&self, metrics: TextMetrics) -> Result<f64> {
let core = self.core.clone();
run_async(move || Ok(core.calculate_readability(&metrics))).await
}
#[napi]
pub async fn memorize_text(
&self,
id: String,
text: String,
trace_id: Option<String>,
) -> Result<()> {
let core = self.core.clone();
run_async(move || {
let _span =
info_span!("memorize_text", id = %id, trace_id = trace_id.as_deref().unwrap_or(""))
.entered();
let vec = core.encode_text(&text);
core.memorize(id, vec)
})
.await
}
#[napi]
pub async fn memorize_text_buffer(
&self,
id: String,
text: Buffer,
trace_id: Option<String>,
) -> Result<()> {
let core = self.core.clone();
let text_str = std::str::from_utf8(&text)
.map_err(|e| napi::Error::from_reason(format!("Invalid UTF-8: {}", e)))?
.to_owned();
run_async(move || {
let _span = info_span!("memorize_text_buffer", id = %id, trace_id = trace_id.as_deref().unwrap_or("")).entered();
let vec = core.encode_text(&text_str);
core.memorize(id, vec)
})
.await
}
#[napi]
pub async fn memorize_batch(
&self,
items: Vec<MemorizeBatchItem>,
trace_id: Option<String>,
) -> Result<()> {
let core = self.core.clone();
run_async(move || {
let _span = info_span!(
"memorize_batch",
count = items.len(),
trace_id = trace_id.as_deref().unwrap_or("")
)
.entered();
use rayon::prelude::*;
let encoded: Vec<(String, EntangledHVec)> = items
.into_par_iter()
.map(|item| {
let vec = core.encode_text(&item.text);
(item.id, vec)
})
.collect();
for (id, vec) in encoded {
core.memorize(id, vec)?;
}
Ok(())
})
.await
}
#[napi]
pub async fn memorize_file(&self, id: String, file_path: String) -> Result<()> {
let core = self.core.clone();
run_async(move || {
let file = std::fs::File::open(&file_path)
.map_err(|e| anyhow::anyhow!("Failed to open {}: {}", file_path, e))?;
let mmap = unsafe { memmap2::Mmap::map(&file) }
.map_err(|e| anyhow::anyhow!("Failed to mmap {}: {}", file_path, e))?;
let text = std::str::from_utf8(&mmap)
.map_err(|e| anyhow::anyhow!("Invalid UTF-8 in {}: {}", file_path, e))?;
let vec = core.encode_text(text);
core.memorize(id, vec)
})
.await
}
#[napi]
pub async fn memorize_vector(&self, id: String, vector: Float32Array) -> Result<()> {
let core = self.core.clone();
let dense: Vec<f32> = vector.to_vec();
run_async(move || core.memorize_vector(id, &dense)).await
}
#[napi]
pub async fn memorize_scalar(&self, id: String, value: f64, min: f64, max: f64) -> Result<()> {
let core = self.core.clone();
run_async(move || core.memorize_scalar(id, value, min, max)).await
}
#[napi]
pub async fn query(
&self,
text: String,
k: u32,
trace_id: Option<String>,
) -> Result<Vec<RetrievalResult>> {
let core = self.core.clone();
run_async(move || {
let _span =
info_span!("query", k = k, trace_id = trace_id.as_deref().unwrap_or("")).entered();
let q_vec = core.encode_text(&text);
let results = core.query(&q_vec, k);
Ok(results)
})
.await
}
#[napi]
pub async fn query_vector(&self, vector: Float32Array, k: u32) -> Result<Vec<RetrievalResult>> {
let core = self.core.clone();
let q_vec = EntangledHVec::from_dense(&vector, core.dimensions());
run_async(move || {
let results = core.query(&q_vec, k);
Ok(results)
})
.await
}
#[napi]
pub async fn query_scalar(
&self,
value: f64,
min: f64,
max: f64,
k: u32,
) -> Result<Vec<RetrievalResult>> {
let core = self.core.clone();
run_async(move || {
let q_vec = EntangledHVec::from_scalar(value, min, max, core.dimensions());
let results = core.query(&q_vec, k);
Ok(results)
})
.await
}
#[napi]
pub async fn query_batch(
&self,
texts: Vec<String>,
k: u32,
) -> Result<Vec<Vec<RetrievalResult>>> {
let core = self.core.clone();
run_async(move || {
let queries: Vec<EntangledHVec> = texts.iter().map(|t| core.encode_text(t)).collect();
Ok(core.query_batch(&queries, k))
})
.await
}
#[napi]
pub async fn query_vector_batch(
&self,
vectors: Vec<Float32Array>,
k: u32,
) -> Result<Vec<Vec<RetrievalResult>>> {
let core = self.core.clone();
let queries: Vec<EntangledHVec> = vectors
.iter()
.map(|v| EntangledHVec::from_dense(v, core.dimensions()))
.collect();
run_async(move || Ok(core.query_batch(&queries, k))).await
}
#[napi]
pub async fn analyze_components(&self, text: String) -> Result<Vec<RetrievalResult>> {
let core = self.core.clone();
run_async(move || {
let vec = core.encode_text(&text);
let results = core.analyze_components(&vec);
Ok(results)
})
.await
}
#[napi]
pub async fn factorize_diffusion(
&self,
product_text: String,
domains: Vec<Vec<String>>,
max_iter: u32,
) -> Result<Vec<Option<String>>> {
let core = self.core.clone();
run_async(move || {
let vec = core.encode_text(&product_text);
let domain_vecs: Vec<Vec<EntangledHVec>> = domains
.iter()
.map(|d| d.iter().map(|s| core.encode_text(s)).collect())
.collect();
let results = core.factorize_diffusion(&vec, &domain_vecs, max_iter as usize);
let mapped = results
.into_iter()
.enumerate()
.map(|(i, opt_vec)| {
opt_vec.and_then(|evec| {
domains[i]
.iter()
.zip(domain_vecs[i].iter())
.min_by_key(|(_, enc)| evec.hamming(enc))
.map(|(s, _)| s.clone())
})
})
.collect();
Ok(mapped)
})
.await
}
#[napi]
pub async fn memorize_triplet(
&self,
id: String,
head: String,
relation: String,
tail: String,
) -> Result<()> {
let core = self.core.clone();
run_async(move || core.memorize_triplet(id, head, relation, tail)).await
}
#[napi]
pub async fn query_triplet(
&self,
head: String,
relation: String,
k: u32,
) -> Result<Vec<RetrievalResult>> {
let core = self.core.clone();
run_async(move || core.query_triplet(head, relation, k)).await
}
#[napi]
pub async fn find_analogy(
&self,
a: String,
b: String,
c: String,
k: Option<u32>,
trace_id: Option<String>,
) -> Result<Vec<RetrievalResult>> {
let core = self.core.clone();
run_async(move || {
let _span =
info_span!("find_analogy", trace_id = trace_id.as_deref().unwrap_or("")).entered();
let results = core.find_analogy(&a, &b, &c, k.unwrap_or(5));
Ok(results)
})
.await
}
#[napi]
pub async fn synthesize_concepts(&self) -> Result<Vec<ConceptCandidate>> {
let core = self.core.clone();
run_async(move || {
let results = core.synthesize_concepts();
Ok(results)
})
.await
}
#[napi]
pub async fn memorize_sequence(&self, id: String, sequence: Vec<String>) -> Result<()> {
let core = self.core.clone();
run_async(move || core.memorize_sequence(id, &sequence)).await
}
#[napi]
pub async fn train_nsg(&self) -> Result<()> {
let core = self.core.clone();
run_async(move || core.train_nsg()).await
}
#[napi]
pub async fn train_ivf(&self) -> Result<()> {
let core = self.core.clone();
run_async(move || core.train_ivf()).await
}
#[napi]
pub async fn query_sequence(
&self,
partial: Vec<String>,
k: u32,
) -> Result<Vec<RetrievalResult>> {
let core = self.core.clone();
run_async(move || core.query_sequence(&partial, k)).await
}
#[napi]
pub async fn delete(&self, id: String) -> Result<bool> {
let core = self.core.clone();
run_async(move || core.delete(&id)).await
}
#[napi]
pub async fn compact(&self) -> Result<()> {
let core = self.core.clone();
run_async(move || core.compact()).await
}
#[napi]
pub async fn audit_since(&self, timestamp_ms: f64) -> Result<Vec<AuditEntryJs>> {
let core = self.core.clone();
let ts = timestamp_ms as u64;
run_async(move || {
let entries = core.audit_since(ts)?;
Ok(entries
.into_iter()
.map(|e| AuditEntryJs {
timestamp_ms: e.timestamp_ms as f64,
op: match e.op {
crate::core::audit::AuditOp::Memorize => "memorize".to_string(),
crate::core::audit::AuditOp::Delete => "delete".to_string(),
crate::core::audit::AuditOp::Compact => "compact".to_string(),
},
id_hash: e.id_hash.iter().map(|b| format!("{:02x}", b)).collect(),
signed: e.signature != [0u8; 64],
})
.collect())
})
.await
}
#[napi]
pub async fn bundle_texts(&self, texts: Vec<String>) -> Result<Vec<u32>> {
let core = self.core.clone();
run_async(move || {
let vecs: Vec<EntangledHVec> = texts.iter().map(|t| core.encode_text(t)).collect();
let bundled = core.bundle(&vecs);
Ok(bundled.indices().to_vec())
})
.await
}
#[napi]
pub async fn memorize_meaning(&self, id: String, text: String) -> Result<()> {
let core = self.core.clone();
run_async(move || core.memorize_meaning(&id, &text)).await
}
#[napi]
pub async fn structural_query(
&self,
known_subjects: Vec<String>,
known_relations: Vec<String>,
target_role: String,
) -> Result<Vec<StructuralResultJs>> {
let core = self.core.clone();
run_async(move || {
let known_vecs: Vec<EntangledHVec> = known_subjects
.iter()
.chain(known_relations.iter())
.map(|t| core.encode_text(t))
.collect();
let mut bindings: Vec<(&str, &EntangledHVec)> = Vec::new();
for (i, v) in known_vecs.iter().enumerate() {
if i < known_subjects.len() {
bindings.push(("subject", v));
} else {
bindings.push(("relation", v));
}
}
let results = core.structural_query(&bindings, &target_role);
Ok(results
.into_iter()
.map(|r| StructuralResultJs {
entity_id: r.entity_id,
confidence: r.confidence,
path: format!("{:?}", r.path),
})
.collect())
})
.await
}
#[napi]
pub async fn multi_hop_query(
&self,
start_entity: String,
relations: Vec<String>,
) -> Result<Vec<MultiHopResultJs>> {
let core = self.core.clone();
run_async(move || {
let rel_refs: Vec<&str> = relations.iter().map(|s| s.as_str()).collect();
let results = core.multi_hop(&start_entity, &rel_refs);
Ok(results
.into_iter()
.map(|r| MultiHopResultJs {
entity_id: r.entity_id,
confidence: r.confidence,
method: format!("{:?}", r.method),
})
.collect())
})
.await
}
#[napi]
pub async fn meaning_cleanup(&self, text: String) -> Result<Option<CleanupResultJs>> {
let core = self.core.clone();
run_async(move || {
let vec = core.encode_text(&text);
Ok(core
.meaning_cleanup(&vec)
.map(|(id, confidence)| CleanupResultJs { id, confidence }))
})
.await
}
#[napi]
pub fn declare_composition_rule(
&self,
name: String,
input_relations: Vec<String>,
output_relation: String,
) {
self.core
.declare_rule(&name, input_relations, output_relation);
}
#[napi(getter)]
pub fn meaning_enabled(&self) -> bool {
self.core.meaning_enabled()
}
#[napi]
pub fn start_cognition(&self) -> Result<()> {
self.core.start_cognition().map_err(napi_err)
}
#[napi]
pub fn stop_cognition(&self) {
self.core.stop_cognition();
}
#[napi(getter)]
pub fn cognition_running(&self) -> bool {
self.core.cognition_running()
}
#[napi(getter)]
pub fn cognition_cycle_count(&self) -> u32 {
self.core.cognition_cycle_count() as u32
}
#[napi(getter)]
pub fn cognition_insight_count(&self) -> u32 {
self.core.cognition_insight_count() as u32
}
#[napi(getter)]
pub fn cognition_enabled(&self) -> bool {
self.core.cognition_enabled()
}
#[napi]
pub fn run_cognition_once(&self) -> u32 {
self.core.run_cognition_once().len() as u32
}
#[napi]
pub fn govern_memory(&self) -> GovernanceReportJs {
let report = self.core.govern_memory();
GovernanceReportJs {
composites_merged: report.composites_merged as u32,
composites_forgotten: report.composites_forgotten as u32,
atoms_forgotten: report.atoms_forgotten as u32,
idf_refreshed: report.idf_refreshed,
atoms_refined: report.refinement.atoms_refined as u32,
}
}
#[napi]
pub async fn add_relation(
&self,
source_id: String,
relation_type: String,
target_id: String,
properties: Option<String>,
valid_from: Option<f64>,
valid_to: Option<f64>,
) -> Result<()> {
let core = self.core.clone();
run_async(move || {
let rel = crate::core::types::Relation {
source_id,
relation_type,
target_id,
properties,
valid_from: valid_from.unwrap_or(0.0),
valid_to: valid_to.unwrap_or(0.0),
};
core.add_relation(&rel)
})
.await
}
#[napi]
pub async fn remove_relation(
&self,
source_id: String,
relation_type: String,
target_id: String,
) -> Result<bool> {
let core = self.core.clone();
run_async(move || Ok(core.remove_relation(&source_id, &relation_type, &target_id))).await
}
#[napi]
pub fn declare_relation_type(
&self,
name: String,
transitive: Option<bool>,
symmetric: Option<bool>,
) {
self.core
.declare_relation_type(crate::core::types::RelationType {
name,
transitive: transitive.unwrap_or(false),
symmetric: symmetric.unwrap_or(false),
});
}
#[napi]
pub async fn traverse(
&self,
start_id: String,
relation_type: Option<String>,
max_depth: Option<u32>,
at_time: Option<f64>,
) -> Result<Vec<crate::core::types::GraphPath>> {
let core = self.core.clone();
run_async(move || {
Ok(core.traverse(
&start_id,
relation_type.as_deref(),
max_depth.unwrap_or(3),
at_time.unwrap_or(0.0),
))
})
.await
}
#[napi]
pub async fn outgoing_relations(
&self,
source_id: String,
relation_type: Option<String>,
at_time: Option<f64>,
) -> Result<Vec<crate::core::types::Relation>> {
let core = self.core.clone();
run_async(move || {
Ok(core.outgoing_relations(
&source_id,
relation_type.as_deref(),
at_time.unwrap_or(0.0),
))
})
.await
}
#[napi]
pub async fn incoming_relations(
&self,
target_id: String,
relation_type: Option<String>,
at_time: Option<f64>,
) -> Result<Vec<crate::core::types::Relation>> {
let core = self.core.clone();
run_async(move || {
Ok(core.incoming_relations(
&target_id,
relation_type.as_deref(),
at_time.unwrap_or(0.0),
))
})
.await
}
#[napi(getter)]
pub fn relation_count(&self) -> u32 {
self.core.relation_count() as u32
}
#[napi]
pub async fn federated_query(
&self,
peer_paths: Vec<String>,
text: String,
k: u32,
) -> Result<Vec<RetrievalResult>> {
let core = self.core.clone();
run_async(move || {
let q_vec = core.encode_text(&text);
core.federated_query(&peer_paths, &q_vec, k)
})
.await
}
}
#[cfg(feature = "node-api")]
#[napi(object)]
pub struct AuditEntryJs {
pub timestamp_ms: f64,
pub op: String,
pub id_hash: String,
pub signed: bool,
}
#[cfg(feature = "node-api")]
#[napi(object)]
pub struct StructuralResultJs {
pub entity_id: String,
pub confidence: f64,
pub path: String,
}
#[cfg(feature = "node-api")]
#[napi(object)]
pub struct MultiHopResultJs {
pub entity_id: String,
pub confidence: f64,
pub method: String,
}
#[cfg(feature = "node-api")]
#[napi(object)]
pub struct CleanupResultJs {
pub id: String,
pub confidence: f64,
}
#[cfg(feature = "node-api")]
#[napi(object)]
pub struct GovernanceReportJs {
pub composites_merged: u32,
pub composites_forgotten: u32,
pub atoms_forgotten: u32,
pub idf_refreshed: bool,
pub atoms_refined: u32,
}
#[cfg(test)]
mod tests {
use crate::core::engine::HmsCore;
use crate::core::entangled::EntangledHVec;
#[test]
fn test_determinism() {
let hms = HmsCore::new(1000, None, None).unwrap();
let v1 = hms.encode_text("hello world");
let v2 = hms.encode_text("hello world");
assert!((v1.similarity(&v2) - 1.0).abs() < 0.0001);
}
#[test]
fn test_from_dense_produces_sparse() {
let dense: Vec<f32> = (0..128).map(|i| (i as f32 - 64.0) / 64.0).collect();
let e = EntangledHVec::from_dense(&dense, 1000);
assert_eq!(e.dim, 1000);
let expected = 1000 / 256;
assert_eq!(e.indices.len(), expected);
}
#[test]
fn test_batch_memorize() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
let items = vec![
("b1", "first batch item"),
("b2", "second batch item"),
("b3", "third batch item"),
];
for (id, text) in &items {
let vec = hms.encode_text(text);
hms.memorize(id.to_string(), vec).unwrap();
}
assert_eq!(hms.vector_count(), 3);
let q = hms.encode_text("first batch item");
let results = hms.query(&q, 3);
assert!(!results.is_empty());
}
#[test]
fn test_custom_concept_config() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.concepts.similarity_threshold = 0.5;
config.concepts.min_cluster_size = 5;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..10 {
let vec = hms.encode_text(&format!("config test document {}", i));
hms.memorize(format!("cfg_{}", i), vec).unwrap();
}
let concepts = hms.synthesize_concepts();
for c in &concepts {
assert!(
c.member_count >= 5,
"Min cluster size should be 5, got {}",
c.member_count
);
}
}
#[test]
fn test_factorize_returns_factors() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
let colors = vec!["red", "blue", "green"];
let shapes = vec!["circle", "square", "triangle"];
for c in &colors {
let v = hms.encode_text(c);
hms.memorize(c.to_string(), v).unwrap();
}
for s in &shapes {
let v = hms.encode_text(s);
hms.memorize(s.to_string(), v).unwrap();
}
let red_vec = hms.encode_text("red");
let circle_vec = hms.encode_text("circle");
let product = red_vec.bind(&circle_vec);
let domains = vec![
colors
.iter()
.map(|s| hms.encode_text(s))
.collect::<Vec<EntangledHVec>>(),
shapes
.iter()
.map(|s| hms.encode_text(s))
.collect::<Vec<EntangledHVec>>(),
];
let results = hms.factorize_diffusion(&product, &domains, 20);
assert_eq!(results.len(), 2);
}
#[test]
fn test_concept_synthesis() {
let dir = tempfile::tempdir().unwrap();
let hms = HmsCore::new(1000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
let base = hms.encode_text("base concept");
for i in 0..20 {
let variant = if i == 0 {
base.clone()
} else {
let perturb = EntangledHVec::new_deterministic(1000, 10000 + i);
EntangledHVec::bundle(&[
base.clone(),
base.clone(),
base.clone(),
base.clone(),
perturb,
])
};
hms.memorize(format!("var_{}", i), variant).unwrap();
}
let concepts = hms.synthesize_concepts();
assert!(
!concepts.is_empty(),
"Should synthesize at least one concept"
);
assert!(
concepts[0].coherence > 0.7,
"Synthesized concept should have high coherence, got {}",
concepts[0].coherence
);
}
#[test]
fn test_nsg_train_and_query() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.nsg.max_degree = 8;
config.nsg.ef_construction = 16;
let hms = HmsCore::new(
1000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..50 {
let vec = hms.encode_text(&format!("nsg document {}", i));
hms.memorize(format!("nsg_{}", i), vec).unwrap();
}
assert!(!hms.nsg_trained());
hms.train_nsg().unwrap();
assert!(hms.nsg_trained());
let q = hms.encode_text("nsg document 0");
let results = hms.query(&q, 5);
assert!(!results.is_empty(), "NSG query should return results");
}
#[test]
fn test_nsg_auto_train_at_threshold() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.nsg.auto_threshold = 30;
config.nsg.max_degree = 8;
config.nsg.ef_construction = 16;
let hms = HmsCore::new(
1000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..29 {
let vec = hms.encode_text(&format!("auto nsg {}", i));
hms.memorize(format!("ansg_{}", i), vec).unwrap();
}
assert!(!hms.nsg_trained());
let vec = hms.encode_text("auto nsg 29");
hms.memorize("ansg_29".to_string(), vec).unwrap();
assert!(hms.nsg_trained(), "NSG should auto-train at threshold");
}
#[test]
fn test_adaptive_routing_e2e() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.nsg.max_degree = 8;
config.nsg.ef_construction = 16;
let hms = HmsCore::new(
1000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..50 {
let vec = hms.encode_text(&format!("routing item {}", i));
hms.memorize(format!("rt_{}", i), vec).unwrap();
}
hms.train_nsg().unwrap();
assert!(hms.nsg_trained());
let q = hms.encode_text("routing item 0");
let results = hms.query(&q, 5);
assert!(
!results.is_empty(),
"Adaptive routing should return results via NSG"
);
}
#[test]
fn test_triplet_memorize_and_query() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
hms.memorize_triplet(
"paris_capital".to_string(),
"Paris".to_string(),
"is_capital_of".to_string(),
"France".to_string(),
)
.unwrap();
hms.memorize_triplet(
"berlin_capital".to_string(),
"Berlin".to_string(),
"is_capital_of".to_string(),
"Germany".to_string(),
)
.unwrap();
hms.memorize_triplet(
"tokyo_capital".to_string(),
"Tokyo".to_string(),
"is_capital_of".to_string(),
"Japan".to_string(),
)
.unwrap();
let results = hms
.query_triplet("Paris".to_string(), "is_capital_of".to_string(), 3)
.unwrap();
assert!(!results.is_empty(), "Triplet query should return results");
assert!(
results.iter().any(|r| r.id == "paris_capital"),
"Paris triplet should appear in top-3 results"
);
}
#[test]
fn test_sequence_memorize_and_query() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
hms.memorize_sequence(
"recipe_1".to_string(),
&[
"preheat oven".to_string(),
"mix ingredients".to_string(),
"pour into pan".to_string(),
"bake for thirty minutes".to_string(),
],
)
.unwrap();
hms.memorize_sequence(
"recipe_2".to_string(),
&[
"boil water".to_string(),
"add pasta".to_string(),
"drain and serve".to_string(),
],
)
.unwrap();
let q = hms.encode_text("preheat oven").permute(0);
let results = hms.query(&q, 2);
assert!(!results.is_empty(), "Sequence query should return results");
}
#[test]
fn test_scalar_query_ordering() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
for i in 0..20 {
let val = i as f64 * 5.0;
hms.memorize_scalar(format!("temp_{}", i), val, 0.0, 100.0)
.unwrap();
}
let q = EntangledHVec::from_scalar(50.0, 0.0, 100.0, 10_000);
let results = hms.query(&q, 5);
assert!(!results.is_empty(), "Scalar query should return results");
let top_idx: usize = results[0]
.id
.strip_prefix("temp_")
.unwrap()
.parse()
.unwrap();
assert!(
(5..=15).contains(&top_idx),
"Top scalar result should be near value 50 (idx 10), got idx {}",
top_idx
);
}
#[test]
fn test_analyze_components() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
for i in 0..30 {
let vec = hms.encode_text(&format!("component analysis document {}", i));
hms.memorize(format!("ca_{}", i), vec).unwrap();
}
let vec = hms.encode_text("component analysis document 0");
let results = hms.analyze_components(&vec);
assert!(
!results.is_empty(),
"analyze_components should return results"
);
assert!(
results.iter().all(|r| r.similarity > 0.05),
"All results should exceed similarity threshold"
);
}
#[test]
fn test_delete_existing() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
let vec = hms.encode_text("hello");
hms.memorize("hello".to_string(), vec).unwrap();
assert_eq!(hms.vector_count(), 1);
assert!(hms.delete("hello").unwrap());
assert_eq!(hms.vector_count(), 0);
let q = hms.encode_text("hello");
let results = hms.query(&q, 5);
assert!(
results.is_empty(),
"Deleted vector should not appear in results"
);
}
#[test]
fn test_delete_nonexistent() {
let hms = HmsCore::new(10_000, None, None).unwrap();
assert!(!hms.delete("no_such_id").unwrap());
}
#[test]
fn test_basic_persistence() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
{
let hms = HmsCore::new(10_000, Some(path.clone()), None).unwrap();
let v = hms.encode_text("persist me");
hms.memorize("p1".to_string(), v).unwrap();
assert_eq!(hms.vector_count(), 1);
}
let hms = HmsCore::new(10_000, Some(path), None).unwrap();
assert_eq!(hms.vector_count(), 1);
}
#[test]
fn test_delete_persistence() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
{
let hms = HmsCore::new(10_000, Some(path.clone()), None).unwrap();
let v1 = hms.encode_text("keep me");
let v2 = hms.encode_text("delete me");
hms.memorize("keep".to_string(), v1).unwrap();
hms.memorize("del".to_string(), v2).unwrap();
assert_eq!(hms.vector_count(), 2);
hms.delete("del").unwrap();
assert_eq!(hms.vector_count(), 1);
}
let hms = HmsCore::new(10_000, Some(path), None).unwrap();
assert_eq!(hms.vector_count(), 1);
let q = hms.encode_text("keep me");
let results = hms.query(&q, 5);
assert!(!results.is_empty(), "Kept vector should survive restart");
}
#[test]
fn test_delete_and_rememorize() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
let v1 = hms.encode_text("version 1");
hms.memorize("doc".to_string(), v1).unwrap();
hms.delete("doc").unwrap();
let v2 = hms.encode_text("version 2");
hms.memorize("doc".to_string(), v2.clone()).unwrap();
assert_eq!(hms.vector_count(), 1);
let q = hms.encode_text("version 2");
let results = hms.query(&q, 1);
assert_eq!(results[0].id, "doc");
}
#[test]
fn test_compact_basic() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
let hms = HmsCore::new(10_000, Some(path.clone()), None).unwrap();
for i in 0..50 {
let vec = hms.encode_text(&format!("item {}", i));
hms.memorize(format!("id_{}", i), vec).unwrap();
}
for i in 0..25 {
hms.delete(&format!("id_{}", i)).unwrap();
}
assert_eq!(hms.vector_count(), 25);
hms.compact().unwrap();
let q = hms.encode_text("item 30");
let results = hms.query(&q, 5);
assert!(!results.is_empty(), "Should find results after compaction");
assert_eq!(hms.vector_count(), 25);
}
#[test]
fn test_compact_persistence() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
{
let hms = HmsCore::new(10_000, Some(path.clone()), None).unwrap();
for i in 0..20 {
let vec = hms.encode_text(&format!("doc {}", i));
hms.memorize(format!("d_{}", i), vec).unwrap();
}
for i in 0..10 {
hms.delete(&format!("d_{}", i)).unwrap();
}
hms.compact().unwrap();
}
let hms = HmsCore::new(10_000, Some(path), None).unwrap();
assert_eq!(hms.vector_count(), 10);
}
#[test]
fn test_multi_shard_insert_query() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.shard.enabled = true;
config.shard.shard_count = 4;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..100 {
let vec = hms.encode_text(&format!("shard document {}", i));
hms.memorize(format!("sd_{}", i), vec).unwrap();
}
assert_eq!(hms.vector_count(), 100);
let q = hms.encode_text("shard document 0");
let results = hms.query(&q, 5);
assert!(
!results.is_empty(),
"Multi-shard query should return results"
);
}
#[test]
fn test_multi_shard_delete() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.shard.enabled = true;
config.shard.shard_count = 4;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..20 {
let vec = hms.encode_text(&format!("item {}", i));
hms.memorize(format!("m_{}", i), vec).unwrap();
}
assert_eq!(hms.vector_count(), 20);
for i in 0..10 {
assert!(hms.delete(&format!("m_{}", i)).unwrap());
}
assert_eq!(hms.vector_count(), 10);
}
#[test]
fn test_auto_shard_trigger() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.shard.enabled = true;
config.shard.shard_count = 0; config.shard.auto_threshold = 50;
config.shard.target_shard_size = 25;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..50 {
let vec = hms.encode_text(&format!("auto shard {}", i));
hms.memorize(format!("as_{}", i), vec).unwrap();
}
assert_eq!(hms.vector_count(), 50);
let q = hms.encode_text("auto shard 0");
let results = hms.query(&q, 5);
assert!(!results.is_empty(), "Should find results after auto-shard");
}
#[test]
fn test_audit_records_operations() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.security.audit_enabled = true;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
let v = hms.encode_text("audit test");
hms.memorize("aud_1".to_string(), v).unwrap();
hms.delete("aud_1").unwrap();
let entries = hms.audit_since(0).unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].op, crate::core::audit::AuditOp::Memorize);
assert_eq!(entries[1].op, crate::core::audit::AuditOp::Delete);
}
#[test]
fn test_audit_disabled_returns_empty() {
let hms = HmsCore::new(10_000, None, None).unwrap();
let entries = hms.audit_since(0).unwrap();
assert!(entries.is_empty());
}
#[test]
fn test_audit_compact_recorded() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.security.audit_enabled = true;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
let v = hms.encode_text("compact audit");
hms.memorize("ca_1".to_string(), v).unwrap();
hms.compact().unwrap();
let entries = hms.audit_since(0).unwrap();
assert!(entries
.iter()
.any(|e| e.op == crate::core::audit::AuditOp::Compact));
}
#[test]
fn test_graph_add_and_traverse() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
for city in &["paris", "france", "europe"] {
let v = hms.encode_text(city);
hms.memorize(city.to_string(), v).unwrap();
}
hms.add_relation(&crate::core::types::Relation {
source_id: "paris".into(),
relation_type: "is_in".into(),
target_id: "france".into(),
properties: None,
valid_from: 0.0,
valid_to: 0.0,
})
.unwrap();
hms.add_relation(&crate::core::types::Relation {
source_id: "france".into(),
relation_type: "is_in".into(),
target_id: "europe".into(),
properties: None,
valid_from: 0.0,
valid_to: 0.0,
})
.unwrap();
assert_eq!(hms.relation_count(), 2);
let paths = hms.traverse("paris", Some("is_in"), 3, 0.0);
assert!(!paths.is_empty());
let targets: Vec<&str> = paths
.iter()
.flat_map(|p| p.hops.iter().map(|h| h.node_id.as_str()))
.collect();
assert!(targets.contains(&"france"));
assert!(targets.contains(&"europe"));
}
#[test]
fn test_graph_transitive_inference() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
for name in &["a", "b", "c"] {
let v = hms.encode_text(name);
hms.memorize(name.to_string(), v).unwrap();
}
hms.declare_relation_type(crate::core::types::RelationType {
name: "contains".into(),
transitive: true,
symmetric: false,
});
hms.add_relation(&crate::core::types::Relation {
source_id: "a".into(),
relation_type: "contains".into(),
target_id: "b".into(),
properties: None,
valid_from: 0.0,
valid_to: 0.0,
})
.unwrap();
hms.add_relation(&crate::core::types::Relation {
source_id: "b".into(),
relation_type: "contains".into(),
target_id: "c".into(),
properties: None,
valid_from: 0.0,
valid_to: 0.0,
})
.unwrap();
let paths = hms.traverse("a", Some("contains"), 3, 0.0);
let inferred = paths
.iter()
.find(|p| p.hops.len() == 1 && p.hops[0].node_id == "c");
assert!(inferred.is_some(), "Should infer transitive a->c");
}
#[test]
fn test_graph_persistence() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
{
let hms = HmsCore::new(10_000, Some(path.clone()), None).unwrap();
let v = hms.encode_text("node_a");
hms.memorize("node_a".to_string(), v).unwrap();
hms.add_relation(&crate::core::types::Relation {
source_id: "node_a".into(),
relation_type: "links_to".into(),
target_id: "node_b".into(),
properties: None,
valid_from: 0.0,
valid_to: 0.0,
})
.unwrap();
}
let hms = HmsCore::new(10_000, Some(path), None).unwrap();
assert_eq!(hms.relation_count(), 1);
let out = hms.outgoing_relations("node_a", None, 0.0);
assert_eq!(out.len(), 1);
assert_eq!(out[0].target_id, "node_b");
}
#[test]
fn test_graph_temporal_query() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
hms.add_relation(&crate::core::types::Relation {
source_id: "alice".into(),
relation_type: "works_at".into(),
target_id: "acme".into(),
properties: None,
valid_from: 1000.0,
valid_to: 2000.0,
})
.unwrap();
hms.add_relation(&crate::core::types::Relation {
source_id: "alice".into(),
relation_type: "works_at".into(),
target_id: "globex".into(),
properties: None,
valid_from: 2001.0,
valid_to: 0.0,
})
.unwrap();
let at_1500 = hms.outgoing_relations("alice", Some("works_at"), 1500.0);
assert_eq!(at_1500.len(), 1);
assert_eq!(at_1500[0].target_id, "acme");
let at_3000 = hms.outgoing_relations("alice", Some("works_at"), 3000.0);
assert_eq!(at_3000.len(), 1);
assert_eq!(at_3000[0].target_id, "globex");
}
#[test]
fn test_federated_query() {
let dir1 = tempfile::tempdir().unwrap();
let dir2 = tempfile::tempdir().unwrap();
let path1 = dir1.path().to_string_lossy().to_string();
let path2 = dir2.path().to_string_lossy().to_string();
{
let hms1 = HmsCore::new(10_000, Some(path1.clone()), None).unwrap();
let v = hms1.encode_text("federated doc alpha");
hms1.memorize("alpha".to_string(), v).unwrap();
}
{
let hms2 = HmsCore::new(10_000, Some(path2.clone()), None).unwrap();
let v = hms2.encode_text("federated doc beta");
hms2.memorize("beta".to_string(), v).unwrap();
}
let hms1 = HmsCore::new(10_000, Some(path1), None).unwrap();
let q = hms1.encode_text("federated doc");
let results = hms1.federated_query(&[path2], &q, 5).unwrap();
let ids: Vec<&str> = results.iter().map(|r| r.id.as_str()).collect();
assert!(ids.contains(&"alpha"), "Should find local result");
assert!(ids.contains(&"beta"), "Should find federated result");
}
#[test]
fn test_graph_compact_preserves_relations() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
let v = hms.encode_text("compactable");
hms.memorize("node1".to_string(), v).unwrap();
hms.add_relation(&crate::core::types::Relation {
source_id: "node1".into(),
relation_type: "related".into(),
target_id: "node2".into(),
properties: None,
valid_from: 0.0,
valid_to: 0.0,
})
.unwrap();
hms.compact().unwrap();
assert_eq!(hms.relation_count(), 1);
assert_eq!(hms.vector_count(), 1);
}
#[test]
fn test_find_analogy() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
for word in &[
"walking", "talking", "running", "walked", "talked", "runner",
] {
let v = hms.encode_text(word);
hms.memorize(word.to_string(), v).unwrap();
}
let results = hms.find_analogy("walking", "walked", "talking", 5);
assert!(!results.is_empty(), "Analogy should return results");
}
#[test]
fn test_ivf_index_persistence() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
{
let mut config = crate::core::config::HmsConfig::default();
config.ivf.n_clusters = 8;
config.ivf.n_landmarks = 64;
config.ivf.d_reduced = 16;
config.ivf.n_probe = 8;
let hms = HmsCore::new(10_000, Some(path.clone()), Some(config)).unwrap();
for i in 0..200 {
let v = hms.encode_text(&format!("ivf persist {}", i));
hms.memorize(format!("ip_{}", i), v).unwrap();
}
hms.train_ivf().unwrap();
assert!(hms.ivf_trained());
}
let mut config = crate::core::config::HmsConfig::default();
config.ivf.n_clusters = 8;
config.ivf.n_landmarks = 64;
config.ivf.d_reduced = 16;
config.ivf.n_probe = 8;
let hms = HmsCore::new(10_000, Some(path), Some(config)).unwrap();
assert!(
hms.ivf_trained(),
"IVF should be loaded from disk on re-open"
);
let q = hms.encode_text("ivf persist 0");
let results = hms.query(&q, 5);
assert!(!results.is_empty(), "IVF query should work after reload");
}
#[test]
fn test_query_batch() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
for i in 0..20 {
let v = hms.encode_text(&format!("batch query doc {}", i));
hms.memorize(format!("bq_{}", i), v).unwrap();
}
let queries: Vec<EntangledHVec> = (0..3)
.map(|i| hms.encode_text(&format!("batch query doc {}", i)))
.collect();
let batch_results = hms.query_batch(&queries, 3);
assert_eq!(
batch_results.len(),
3,
"Should return one result set per query"
);
for results in &batch_results {
assert!(!results.is_empty(), "Each query should return results");
}
}
#[test]
fn test_memorize_vector_through_core() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
let dense: Vec<f32> = (0..128).map(|i| (i as f32 - 64.0) / 64.0).collect();
hms.memorize_vector("dense_1".to_string(), &dense).unwrap();
assert_eq!(hms.vector_count(), 1);
let q = EntangledHVec::from_dense(&dense, 10_000);
let results = hms.query(&q, 1);
assert_eq!(results[0].id, "dense_1");
}
#[test]
fn test_readability_through_core() {
let hms = HmsCore::new(10_000, None, None).unwrap();
let metrics = hms.analyze_text("The cat sat on the mat.");
assert!(metrics.word_count > 0);
let score = hms.calculate_readability(&metrics);
assert!(
score > 50.0,
"Simple sentence should be highly readable (got {:.1})",
score
);
}
#[test]
fn test_nsg_index_persistence() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
{
let mut config = crate::core::config::HmsConfig::default();
config.nsg.max_degree = 8;
config.nsg.ef_construction = 16;
let hms = HmsCore::new(10_000, Some(path.clone()), Some(config)).unwrap();
for i in 0..50 {
let v = hms.encode_text(&format!("persist nsg {}", i));
hms.memorize(format!("pn_{}", i), v).unwrap();
}
hms.train_nsg().unwrap();
assert!(hms.nsg_trained());
}
let mut config = crate::core::config::HmsConfig::default();
config.nsg.max_degree = 8;
config.nsg.ef_construction = 16;
let hms = HmsCore::new(10_000, Some(path), Some(config)).unwrap();
assert!(
hms.nsg_trained(),
"NSG should be loaded from disk on re-open"
);
let q = hms.encode_text("persist nsg 0");
let results = hms.query(&q, 5);
assert!(!results.is_empty(), "NSG query should work after reload");
}
#[test]
fn test_nsg_accuracy_matches_brute_force() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.nsg.max_degree = 8;
config.nsg.ef_construction = 16;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..50 {
let v = hms.encode_text(&format!("accuracy test {}", i));
hms.memorize(format!("acc_{}", i), v).unwrap();
}
let q = hms.encode_text("accuracy test 0");
let brute_results = hms.query(&q, 1);
hms.train_nsg().unwrap();
let nsg_results = hms.query(&q, 1);
assert_eq!(
brute_results[0].id, nsg_results[0].id,
"NSG top-1 should match brute-force top-1"
);
}
#[test]
fn test_multi_shard_results_from_multiple_shards() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.shard.enabled = true;
config.shard.shard_count = 4;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..100 {
let v = hms.encode_text(&format!("multi verify {}", i));
hms.memorize(format!("mv_{}", i), v).unwrap();
}
let q = hms.encode_text("multi verify");
let results = hms.query(&q, 20);
assert!(
results.len() >= 10,
"Should return many results from across shards"
);
use fxhash::FxHasher;
use std::hash::Hasher;
let mut shard_ids: std::collections::HashSet<usize> = std::collections::HashSet::new();
for r in &results {
let mut hasher = FxHasher::default();
hasher.write(r.id.as_bytes());
shard_ids.insert((hasher.finish() as usize) % 4);
}
assert!(
shard_ids.len() >= 2,
"Results should come from multiple shards (got {} distinct shards)",
shard_ids.len()
);
}
#[test]
fn test_compact_multi_shard() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.shard.enabled = true;
config.shard.shard_count = 2;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..30 {
let v = hms.encode_text(&format!("compact shard {}", i));
hms.memorize(format!("cs_{}", i), v).unwrap();
}
for i in 0..15 {
hms.delete(&format!("cs_{}", i)).unwrap();
}
assert_eq!(hms.vector_count(), 15);
hms.compact().unwrap();
assert_eq!(hms.vector_count(), 15);
let q = hms.encode_text("compact shard 20");
let results = hms.query(&q, 5);
assert!(
!results.is_empty(),
"Should find results after multi-shard compact"
);
}
#[test]
fn test_manual_ivf_train_and_query() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.ivf.n_clusters = 8;
config.ivf.n_landmarks = 64;
config.ivf.d_reduced = 16;
config.ivf.n_probe = 8;
let hms = HmsCore::new(
10000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..200 {
let vec = hms.encode_text(&format!("document number {}", i));
hms.memorize(format!("doc_{}", i), vec).unwrap();
}
assert!(!hms.ivf_trained());
hms.train_ivf().unwrap();
assert!(hms.ivf_trained());
let q = hms.encode_text("document number 0");
let results = hms.query(&q, 10);
assert!(!results.is_empty(), "IVF query should return results");
}
#[test]
fn test_auto_train_ivf_at_threshold() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.ivf.enabled = true;
config.ivf.auto_threshold = 50;
config.ivf.n_clusters = 8;
config.ivf.n_landmarks = 64;
config.ivf.d_reduced = 16;
config.ivf.n_probe = 8;
let hms = HmsCore::new(
1000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
for i in 0..49 {
let vec = hms.encode_text(&format!("auto item {}", i));
hms.memorize(format!("auto_{}", i), vec).unwrap();
}
assert!(!hms.ivf_trained(), "Should not be trained before threshold");
let vec = hms.encode_text("auto item 49");
hms.memorize("auto_49".to_string(), vec).unwrap();
assert!(
hms.ivf_trained(),
"Should be trained after reaching threshold"
);
}
}
#[cfg(all(test, feature = "security"))]
mod security_integration_tests {
use crate::core::audit::AuditOp;
use crate::core::engine::HmsCore;
use crate::core::security::SigningManager;
#[test]
fn test_signed_audit_entries() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.security.signing_enabled = true;
config.security.audit_enabled = true;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
let v = hms.encode_text("signed audit test");
hms.memorize("sig_1".to_string(), v).unwrap();
hms.delete("sig_1").unwrap();
let entries = hms.audit_since(0).unwrap();
assert_eq!(entries.len(), 2);
for entry in &entries {
assert_ne!(entry.signature, [0u8; 64], "Entry should be signed");
}
let key_path = dir.path().join("hms_signing.key");
let mgr = SigningManager::new(&key_path).unwrap();
for entry in &entries {
let mut signable = [0u8; 41];
signable[0..8].copy_from_slice(&entry.timestamp_ms.to_le_bytes());
signable[8] = entry.op as u8;
signable[9..41].copy_from_slice(&entry.id_hash);
mgr.verify(&signable, &entry.signature)
.expect("Signature verification failed");
}
}
#[test]
fn test_signed_compact_audit() {
let dir = tempfile::tempdir().unwrap();
let mut config = crate::core::config::HmsConfig::default();
config.security.signing_enabled = true;
config.security.audit_enabled = true;
let hms = HmsCore::new(
10_000,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap();
let v = hms.encode_text("compact signed");
hms.memorize("cs_1".to_string(), v).unwrap();
hms.compact().unwrap();
let entries = hms.audit_since(0).unwrap();
let compact_entry = entries.iter().find(|e| e.op == AuditOp::Compact).unwrap();
assert_ne!(compact_entry.signature, [0u8; 64]);
let key_path = dir.path().join("hms_signing.key");
let mgr = SigningManager::new(&key_path).unwrap();
let mut signable = [0u8; 41];
signable[0..8].copy_from_slice(&compact_entry.timestamp_ms.to_le_bytes());
signable[8] = compact_entry.op as u8;
signable[9..41].copy_from_slice(&compact_entry.id_hash);
mgr.verify(&signable, &compact_entry.signature)
.expect("Compact signature verification failed");
}
#[test]
fn test_encrypted_arena_roundtrip() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
{
let mut config = crate::core::config::HmsConfig::default();
config.security.encryption_enabled = true;
config.security.encryption_passphrase = Some("test-passphrase-123".to_string());
let hms = HmsCore::new(10_000, Some(path.clone()), Some(config)).unwrap();
let v = hms.encode_text("encrypted data");
hms.memorize("enc_1".to_string(), v).unwrap();
let v2 = hms.encode_text("more encrypted data");
hms.memorize("enc_2".to_string(), v2).unwrap();
assert_eq!(hms.vector_count(), 2);
}
let mut config = crate::core::config::HmsConfig::default();
config.security.encryption_enabled = true;
config.security.encryption_passphrase = Some("test-passphrase-123".to_string());
let hms = HmsCore::new(10_000, Some(path), Some(config)).unwrap();
assert_eq!(hms.vector_count(), 2);
let q = hms.encode_text("encrypted data");
let results = hms.query(&q, 1);
assert_eq!(results[0].id, "enc_1");
}
#[test]
fn test_encrypted_compact_roundtrip() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_string_lossy().to_string();
let mut config = crate::core::config::HmsConfig::default();
config.security.encryption_enabled = true;
config.security.encryption_passphrase = Some("compact-test".to_string());
let hms = HmsCore::new(10_000, Some(path.clone()), Some(config.clone())).unwrap();
for i in 0..10 {
let v = hms.encode_text(&format!("encrypted compact {}", i));
hms.memorize(format!("ec_{}", i), v).unwrap();
}
for i in 0..5 {
hms.delete(&format!("ec_{}", i)).unwrap();
}
hms.compact().unwrap();
assert_eq!(hms.vector_count(), 5);
drop(hms);
let hms = HmsCore::new(10_000, Some(path), Some(config)).unwrap();
assert_eq!(hms.vector_count(), 5);
}
}
#[cfg(test)]
mod meaning_tests {
use crate::core::config::HmsConfig;
use crate::core::engine::HmsCore;
fn meaning_hms(dim: u32) -> HmsCore {
let dir = tempfile::tempdir().unwrap();
let mut config = HmsConfig::default();
config.meaning.enabled = true;
config.meaning.auto_decompose = true;
config.meaning.beta = 24.0;
HmsCore::new(
dim,
Some(dir.path().to_string_lossy().to_string()),
Some(config),
)
.unwrap()
}
#[test]
fn test_basin_certification() {
let hms = meaning_hms(16384);
for i in 0..1000u64 {
let v = crate::core::entangled::EntangledHVec::new_deterministic(16384, i);
hms.memorize(format!("atom_{}", i), v).unwrap();
}
let mut recovered = 0;
for probe_seed in [42u64, 100, 250, 500, 750, 999] {
let original =
crate::core::entangled::EntangledHVec::new_deterministic(16384, probe_seed);
let mut noisy_indices = original.indices().to_vec();
let mut rng = rand::thread_rng();
use rand::Rng;
for idx in noisy_indices.iter_mut().take(16) {
*idx = rng.gen_range(0..16384u32);
}
noisy_indices.sort_unstable();
noisy_indices.dedup();
let noisy = crate::core::entangled::EntangledHVec::from_indices(noisy_indices, 16384);
if let Some((id, _conf)) = hms.meaning_cleanup(&noisy) {
if id == format!("atom_{}", probe_seed) {
recovered += 1;
}
}
}
assert!(
recovered >= 5,
"Should recover at least 5/6 atoms from 25% noise, got {}",
recovered
);
}
#[test]
fn test_structural_query_e2e() {
let hms = meaning_hms(16384);
hms.memorize_meaning("doc1", "Paris is capital_of France")
.unwrap();
let s = hms.encode_text("Paris");
let r = hms.encode_text("is_a");
let _results = hms.structural_query(&[("subject", &s), ("relation", &r)], "object");
assert!(hms.meaning_enabled());
}
#[test]
fn test_multi_hop_chained() {
let hms = meaning_hms(16384);
hms.memorize_meaning("t1", "John has father Mark").unwrap();
hms.memorize_meaning("t2", "Mark has father Bob").unwrap();
let _results = hms.multi_hop("John", &["has_father", "has_father"]);
assert!(hms.meaning_enabled());
}
#[test]
fn test_meaning_decompose_e2e() {
let hms = meaning_hms(16384);
hms.memorize_meaning("doc1", "Paris is a city").unwrap();
assert!(hms.meaning_enabled());
}
#[test]
fn test_backward_compat_meaning_disabled() {
let dir = tempfile::tempdir().unwrap();
let hms =
HmsCore::new(10_000, Some(dir.path().to_string_lossy().to_string()), None).unwrap();
assert!(!hms.meaning_enabled());
let v = hms.encode_text("test");
hms.memorize("t1".to_string(), v).unwrap();
assert_eq!(hms.vector_count(), 1);
assert!(hms.structural_query(&[], "object").is_empty());
assert!(hms.multi_hop("t1", &["r"]).is_empty());
}
}
#[cfg(test)]
mod lib_proptest;
#[cfg(test)]
mod ts_export_tests {
use ts_rs::TS;
#[test]
fn export_ts_bindings() {
crate::RetrievalResult::export_all().unwrap();
crate::ConceptCandidate::export_all().unwrap();
crate::TextMetrics::export_all().unwrap();
crate::MemorizeBatchItem::export_all().unwrap();
crate::HmsError::export_all().unwrap();
}
#[test]
fn export_json_schemas() {
use schemars::schema_for;
let dir = std::path::Path::new("schemas");
std::fs::create_dir_all(dir).unwrap();
let schemas: Vec<(&str, schemars::schema::RootSchema)> = vec![
("RetrievalResult", schema_for!(crate::RetrievalResult)),
("ConceptCandidate", schema_for!(crate::ConceptCandidate)),
("TextMetrics", schema_for!(crate::TextMetrics)),
("MemorizeBatchItem", schema_for!(crate::MemorizeBatchItem)),
("HmsError", schema_for!(crate::HmsError)),
];
for (name, schema) in schemas {
let json = serde_json::to_string_pretty(&schema).unwrap();
std::fs::write(dir.join(format!("{}.json", name)), json).unwrap();
}
}
}