use std::collections::HashMap;
use std::time::SystemTime;
use anyhow::Result;
use serde::{Deserialize, Serialize};
use crate::{Vector, VectorData};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NamedDimensionVector {
pub dimension_names: HashMap<String, usize>,
pub vector: Vector,
}
impl NamedDimensionVector {
pub fn new(dimension_names: Vec<String>, values: Vec<f32>) -> Result<Self> {
if dimension_names.len() != values.len() {
return Err(anyhow::anyhow!("Dimension names must match values length"));
}
let mut name_map = HashMap::new();
for (idx, name) in dimension_names.iter().enumerate() {
if name_map.contains_key(name) {
return Err(anyhow::anyhow!("Duplicate dimension name: {}", name));
}
name_map.insert(name.clone(), idx);
}
Ok(Self {
dimension_names: name_map,
vector: Vector::new(values),
})
}
pub fn get_by_name(&self, name: &str) -> Option<f32> {
self.dimension_names
.get(name)
.and_then(|&idx| match &self.vector.values {
VectorData::F32(values) => values.get(idx).copied(),
_ => {
let f32_values = self.vector.as_f32();
f32_values.get(idx).copied()
}
})
}
pub fn set_by_name(&mut self, name: &str, value: f32) -> Result<()> {
if let Some(&idx) = self.dimension_names.get(name) {
match &mut self.vector.values {
VectorData::F32(values) => {
if idx < values.len() {
values[idx] = value;
Ok(())
} else {
Err(anyhow::anyhow!("Index out of bounds"))
}
}
_ => Err(anyhow::anyhow!(
"Vector type must be F32 for direct modification"
)),
}
} else {
Err(anyhow::anyhow!("Unknown dimension name: {}", name))
}
}
pub fn dimension_names_ordered(&self) -> Vec<String> {
let mut names: Vec<(String, usize)> = self
.dimension_names
.iter()
.map(|(name, &idx)| (name.clone(), idx))
.collect();
names.sort_by_key(|(_, idx)| *idx);
names.into_iter().map(|(name, _)| name).collect()
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct HierarchicalVector {
pub levels: Vec<Vector>,
pub level_names: Vec<String>,
pub level_metadata: Vec<HashMap<String, String>>,
}
impl HierarchicalVector {
pub fn new(levels: Vec<Vector>, level_names: Vec<String>) -> Result<Self> {
if levels.len() != level_names.len() {
return Err(anyhow::anyhow!("Levels and names must have same length"));
}
if levels.is_empty() {
return Err(anyhow::anyhow!("Must have at least one level"));
}
let level_metadata = vec![HashMap::new(); levels.len()];
Ok(Self {
levels,
level_names,
level_metadata,
})
}
pub fn get_level(&self, level: usize) -> Option<&Vector> {
self.levels.get(level)
}
pub fn get_level_by_name(&self, name: &str) -> Option<&Vector> {
self.level_names
.iter()
.position(|n| n == name)
.and_then(|idx| self.levels.get(idx))
}
pub fn add_level_metadata(&mut self, level: usize, key: String, value: String) -> Result<()> {
if level >= self.levels.len() {
return Err(anyhow::anyhow!("Level index out of bounds"));
}
self.level_metadata[level].insert(key, value);
Ok(())
}
pub fn cosine_similarity_at_level(
&self,
other: &HierarchicalVector,
level: usize,
) -> Result<f32> {
let self_vec = self
.get_level(level)
.ok_or_else(|| anyhow::anyhow!("Level {} not found in self", level))?;
let other_vec = other
.get_level(level)
.ok_or_else(|| anyhow::anyhow!("Level {} not found in other", level))?;
self_vec.cosine_similarity(other_vec)
}
pub fn weighted_similarity(&self, other: &HierarchicalVector, weights: &[f32]) -> Result<f32> {
if self.levels.len() != other.levels.len() {
return Err(anyhow::anyhow!(
"Hierarchical vectors must have same number of levels"
));
}
if weights.len() != self.levels.len() {
return Err(anyhow::anyhow!("Weights must match number of levels"));
}
let mut total_similarity = 0.0;
let mut total_weight = 0.0;
for (i, weight) in weights.iter().enumerate() {
if *weight > 0.0 {
let sim = self.cosine_similarity_at_level(other, i)?;
total_similarity += sim * weight;
total_weight += weight;
}
}
if total_weight > 0.0 {
Ok(total_similarity / total_weight)
} else {
Ok(0.0)
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TemporalVector {
pub vector: Vector,
pub timestamp: SystemTime,
pub validity_duration: Option<u64>,
pub decay_factor: f32,
}
impl TemporalVector {
pub fn new(vector: Vector) -> Self {
Self {
vector,
timestamp: SystemTime::now(),
validity_duration: None,
decay_factor: 1.0,
}
}
pub fn with_timestamp(vector: Vector, timestamp: SystemTime) -> Self {
Self {
vector,
timestamp,
validity_duration: None,
decay_factor: 1.0,
}
}
pub fn with_validity(mut self, duration_secs: u64) -> Self {
self.validity_duration = Some(duration_secs);
self
}
pub fn with_decay(mut self, decay_factor: f32) -> Self {
self.decay_factor = decay_factor.clamp(0.0, 1.0);
self
}
pub fn is_valid(&self) -> bool {
if let Some(duration) = self.validity_duration {
if let Ok(elapsed) = self.timestamp.elapsed() {
return elapsed.as_secs() < duration;
}
}
true
}
pub fn decayed_similarity(&self, other: &TemporalVector) -> Result<f32> {
let base_similarity = self.vector.cosine_similarity(&other.vector)?;
let self_age = self.timestamp.elapsed().unwrap_or_default().as_secs_f32();
let other_age = other.timestamp.elapsed().unwrap_or_default().as_secs_f32();
let age_diff = (self_age - other_age).abs();
let decay = (-age_diff * (1.0 - self.decay_factor) / 3600.0).exp();
Ok(base_similarity * decay)
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct WeightedDimensionVector {
pub vector: Vector,
pub weights: Vec<f32>,
}
impl WeightedDimensionVector {
pub fn new(values: Vec<f32>, weights: Vec<f32>) -> Result<Self> {
if values.len() != weights.len() {
return Err(anyhow::anyhow!("Values and weights must have same length"));
}
if weights.iter().any(|&w| w < 0.0) {
return Err(anyhow::anyhow!("Weights must be non-negative"));
}
Ok(Self {
vector: Vector::new(values),
weights,
})
}
pub fn uniform(values: Vec<f32>) -> Self {
let weight = 1.0 / values.len() as f32;
let weights = vec![weight; values.len()];
Self {
vector: Vector::new(values),
weights,
}
}
pub fn normalize_weights(&mut self) {
let sum: f32 = self.weights.iter().sum();
if sum > 0.0 {
for weight in &mut self.weights {
*weight /= sum;
}
}
}
pub fn weighted_cosine_similarity(&self, other: &WeightedDimensionVector) -> Result<f32> {
if self.vector.dimensions != other.vector.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
let self_values = self.vector.as_f32();
let other_values = other.vector.as_f32();
let combined_weights: Vec<f32> = self
.weights
.iter()
.zip(&other.weights)
.map(|(w1, w2)| (w1 + w2) / 2.0)
.collect();
let weighted_dot: f32 = self_values
.iter()
.zip(&other_values)
.zip(&combined_weights)
.map(|((a, b), w)| a * b * w)
.sum();
let self_magnitude: f32 = self_values
.iter()
.zip(&self.weights)
.map(|(v, w)| v * v * w)
.sum::<f32>()
.sqrt();
let other_magnitude: f32 = other_values
.iter()
.zip(&other.weights)
.map(|(v, w)| v * v * w)
.sum::<f32>()
.sqrt();
if self_magnitude == 0.0 || other_magnitude == 0.0 {
return Ok(0.0);
}
Ok(weighted_dot / (self_magnitude * other_magnitude))
}
pub fn top_dimensions(&self, k: usize) -> Vec<(usize, f32, f32)> {
let mut indexed: Vec<(usize, f32, f32)> = self
.vector
.as_f32()
.iter()
.zip(&self.weights)
.enumerate()
.map(|(idx, (&value, &weight))| (idx, value, weight))
.collect();
indexed.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
indexed.truncate(k);
indexed
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ConfidenceScoredVector {
pub mean: Vector,
pub confidence: Vec<f32>,
pub overall_confidence: f32,
}
impl ConfidenceScoredVector {
pub fn new(mean_values: Vec<f32>, confidence_scores: Vec<f32>) -> Result<Self> {
if mean_values.len() != confidence_scores.len() {
return Err(anyhow::anyhow!(
"Mean values and confidence scores must have same length"
));
}
if confidence_scores.iter().any(|&c| !(0.0..=1.0).contains(&c)) {
return Err(anyhow::anyhow!(
"Confidence scores must be between 0.0 and 1.0"
));
}
let overall_confidence =
confidence_scores.iter().sum::<f32>() / confidence_scores.len() as f32;
Ok(Self {
mean: Vector::new(mean_values),
confidence: confidence_scores,
overall_confidence,
})
}
pub fn high_confidence(values: Vec<f32>) -> Self {
let confidence = vec![0.95; values.len()];
Self {
mean: Vector::new(values),
overall_confidence: 0.95,
confidence,
}
}
pub fn confidence_weighted_similarity(&self, other: &ConfidenceScoredVector) -> Result<f32> {
if self.mean.dimensions != other.mean.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
let self_values = self.mean.as_f32();
let other_values = other.mean.as_f32();
let weighted_dot: f32 = self_values
.iter()
.zip(&other_values)
.zip(self.confidence.iter().zip(&other.confidence))
.map(|((a, b), (c1, c2))| a * b * c1 * c2)
.sum();
let self_magnitude: f32 = self_values
.iter()
.zip(&self.confidence)
.map(|(v, c)| v * v * c)
.sum::<f32>()
.sqrt();
let other_magnitude: f32 = other_values
.iter()
.zip(&other.confidence)
.map(|(v, c)| v * v * c)
.sum::<f32>()
.sqrt();
if self_magnitude == 0.0 || other_magnitude == 0.0 {
return Ok(0.0);
}
let similarity = weighted_dot / (self_magnitude * other_magnitude);
Ok(similarity * self.overall_confidence * other.overall_confidence)
}
pub fn sample(&self) -> Vector {
use crate::random_utils::NormalSampler as Normal;
use scirs2_core::random::Random;
let mut rng = Random::seed(42);
let values = self.mean.as_f32();
let mut sampled = Vec::new();
for (i, &mean_val) in values.iter().enumerate() {
let std_dev = (1.0 - self.confidence[i]) * mean_val.abs() * 0.1; if std_dev > 0.0 {
let normal =
Normal::new(mean_val, std_dev).expect("std_dev validated to be positive");
sampled.push(normal.sample(&mut rng));
} else {
sampled.push(mean_val);
}
}
Vector::new(sampled)
}
pub fn low_confidence_dimensions(&self, threshold: f32) -> Vec<(usize, f32, f32)> {
self.mean
.as_f32()
.iter()
.zip(&self.confidence)
.enumerate()
.filter(|&(_, (_, &conf))| conf < threshold)
.map(|(idx, (&value, &conf))| (idx, value, conf))
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_named_dimension_vector() -> Result<()> {
let names = vec!["age".to_string(), "income".to_string(), "score".to_string()];
let values = vec![25.0, 50000.0, 0.85];
let mut named_vec = NamedDimensionVector::new(names, values)?;
assert_eq!(named_vec.get_by_name("age"), Some(25.0));
assert_eq!(named_vec.get_by_name("income"), Some(50000.0));
assert_eq!(named_vec.get_by_name("unknown"), None);
named_vec.set_by_name("score", 0.95)?;
assert_eq!(named_vec.get_by_name("score"), Some(0.95));
Ok(())
}
#[test]
fn test_hierarchical_vector() -> Result<()> {
let level1 = Vector::new(vec![1.0, 2.0]);
let level2 = Vector::new(vec![1.0, 2.0, 3.0, 4.0]);
let level3 = Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
let levels = vec![level1, level2, level3];
let names = vec![
"coarse".to_string(),
"medium".to_string(),
"fine".to_string(),
];
let hier_vec = HierarchicalVector::new(levels, names)?;
assert_eq!(hier_vec.levels.len(), 3);
assert!(hier_vec.get_level_by_name("medium").is_some());
assert_eq!(
hier_vec
.get_level_by_name("medium")
.expect("test value")
.dimensions,
4
);
Ok(())
}
#[test]
fn test_temporal_vector() {
let vec = Vector::new(vec![1.0, 2.0, 3.0]);
let temporal = TemporalVector::new(vec)
.with_validity(3600) .with_decay(0.9);
assert!(temporal.is_valid());
assert_eq!(temporal.decay_factor, 0.9);
}
#[test]
fn test_weighted_dimension_vector() -> Result<()> {
let values = vec![1.0, 2.0, 3.0];
let weights = vec![0.1, 0.3, 0.6];
let mut weighted = WeightedDimensionVector::new(values, weights)?;
weighted.normalize_weights();
let sum: f32 = weighted.weights.iter().sum();
assert!((sum - 1.0).abs() < 1e-6);
let top = weighted.top_dimensions(2);
assert_eq!(top.len(), 2);
assert_eq!(top[0].0, 2); Ok(())
}
#[test]
fn test_confidence_scored_vector() -> Result<()> {
let values = vec![1.0, 2.0, 3.0];
let confidence = vec![0.9, 0.8, 0.95];
let conf_vec = ConfidenceScoredVector::new(values, confidence)?;
assert!(conf_vec.overall_confidence > 0.8);
let low_conf = conf_vec.low_confidence_dimensions(0.85);
assert_eq!(low_conf.len(), 1);
assert_eq!(low_conf[0].0, 1); Ok(())
}
}