1use std::collections::HashMap;
11use std::time::SystemTime;
12
13use anyhow::Result;
14use serde::{Deserialize, Serialize};
15
16use crate::{Vector, VectorData};
17
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
20pub struct NamedDimensionVector {
21 pub dimension_names: HashMap<String, usize>,
23 pub vector: Vector,
25}
26
27impl NamedDimensionVector {
28 pub fn new(dimension_names: Vec<String>, values: Vec<f32>) -> Result<Self> {
30 if dimension_names.len() != values.len() {
31 return Err(anyhow::anyhow!("Dimension names must match values length"));
32 }
33
34 let mut name_map = HashMap::new();
35 for (idx, name) in dimension_names.iter().enumerate() {
36 if name_map.contains_key(name) {
37 return Err(anyhow::anyhow!("Duplicate dimension name: {}", name));
38 }
39 name_map.insert(name.clone(), idx);
40 }
41
42 Ok(Self {
43 dimension_names: name_map,
44 vector: Vector::new(values),
45 })
46 }
47
48 pub fn get_by_name(&self, name: &str) -> Option<f32> {
50 self.dimension_names
51 .get(name)
52 .and_then(|&idx| match &self.vector.values {
53 VectorData::F32(values) => values.get(idx).copied(),
54 _ => {
55 let f32_values = self.vector.as_f32();
56 f32_values.get(idx).copied()
57 }
58 })
59 }
60
61 pub fn set_by_name(&mut self, name: &str, value: f32) -> Result<()> {
63 if let Some(&idx) = self.dimension_names.get(name) {
64 match &mut self.vector.values {
65 VectorData::F32(values) => {
66 if idx < values.len() {
67 values[idx] = value;
68 Ok(())
69 } else {
70 Err(anyhow::anyhow!("Index out of bounds"))
71 }
72 }
73 _ => Err(anyhow::anyhow!(
74 "Vector type must be F32 for direct modification"
75 )),
76 }
77 } else {
78 Err(anyhow::anyhow!("Unknown dimension name: {}", name))
79 }
80 }
81
82 pub fn dimension_names_ordered(&self) -> Vec<String> {
84 let mut names: Vec<(String, usize)> = self
85 .dimension_names
86 .iter()
87 .map(|(name, &idx)| (name.clone(), idx))
88 .collect();
89 names.sort_by_key(|(_, idx)| *idx);
90 names.into_iter().map(|(name, _)| name).collect()
91 }
92}
93
94#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
96pub struct HierarchicalVector {
97 pub levels: Vec<Vector>,
99 pub level_names: Vec<String>,
101 pub level_metadata: Vec<HashMap<String, String>>,
103}
104
105impl HierarchicalVector {
106 pub fn new(levels: Vec<Vector>, level_names: Vec<String>) -> Result<Self> {
108 if levels.len() != level_names.len() {
109 return Err(anyhow::anyhow!("Levels and names must have same length"));
110 }
111
112 if levels.is_empty() {
113 return Err(anyhow::anyhow!("Must have at least one level"));
114 }
115
116 let level_metadata = vec![HashMap::new(); levels.len()];
117
118 Ok(Self {
119 levels,
120 level_names,
121 level_metadata,
122 })
123 }
124
125 pub fn get_level(&self, level: usize) -> Option<&Vector> {
127 self.levels.get(level)
128 }
129
130 pub fn get_level_by_name(&self, name: &str) -> Option<&Vector> {
132 self.level_names
133 .iter()
134 .position(|n| n == name)
135 .and_then(|idx| self.levels.get(idx))
136 }
137
138 pub fn add_level_metadata(&mut self, level: usize, key: String, value: String) -> Result<()> {
140 if level >= self.levels.len() {
141 return Err(anyhow::anyhow!("Level index out of bounds"));
142 }
143 self.level_metadata[level].insert(key, value);
144 Ok(())
145 }
146
147 pub fn cosine_similarity_at_level(
149 &self,
150 other: &HierarchicalVector,
151 level: usize,
152 ) -> Result<f32> {
153 let self_vec = self
154 .get_level(level)
155 .ok_or_else(|| anyhow::anyhow!("Level {} not found in self", level))?;
156 let other_vec = other
157 .get_level(level)
158 .ok_or_else(|| anyhow::anyhow!("Level {} not found in other", level))?;
159
160 self_vec.cosine_similarity(other_vec)
161 }
162
163 pub fn weighted_similarity(&self, other: &HierarchicalVector, weights: &[f32]) -> Result<f32> {
165 if self.levels.len() != other.levels.len() {
166 return Err(anyhow::anyhow!(
167 "Hierarchical vectors must have same number of levels"
168 ));
169 }
170
171 if weights.len() != self.levels.len() {
172 return Err(anyhow::anyhow!("Weights must match number of levels"));
173 }
174
175 let mut total_similarity = 0.0;
176 let mut total_weight = 0.0;
177
178 for (i, weight) in weights.iter().enumerate() {
179 if *weight > 0.0 {
180 let sim = self.cosine_similarity_at_level(other, i)?;
181 total_similarity += sim * weight;
182 total_weight += weight;
183 }
184 }
185
186 if total_weight > 0.0 {
187 Ok(total_similarity / total_weight)
188 } else {
189 Ok(0.0)
190 }
191 }
192}
193
194#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
196pub struct TemporalVector {
197 pub vector: Vector,
199 pub timestamp: SystemTime,
201 pub validity_duration: Option<u64>,
203 pub decay_factor: f32,
205}
206
207impl TemporalVector {
208 pub fn new(vector: Vector) -> Self {
210 Self {
211 vector,
212 timestamp: SystemTime::now(),
213 validity_duration: None,
214 decay_factor: 1.0,
215 }
216 }
217
218 pub fn with_timestamp(vector: Vector, timestamp: SystemTime) -> Self {
220 Self {
221 vector,
222 timestamp,
223 validity_duration: None,
224 decay_factor: 1.0,
225 }
226 }
227
228 pub fn with_validity(mut self, duration_secs: u64) -> Self {
230 self.validity_duration = Some(duration_secs);
231 self
232 }
233
234 pub fn with_decay(mut self, decay_factor: f32) -> Self {
236 self.decay_factor = decay_factor.clamp(0.0, 1.0);
237 self
238 }
239
240 pub fn is_valid(&self) -> bool {
242 if let Some(duration) = self.validity_duration {
243 if let Ok(elapsed) = self.timestamp.elapsed() {
244 return elapsed.as_secs() < duration;
245 }
246 }
247 true
248 }
249
250 pub fn decayed_similarity(&self, other: &TemporalVector) -> Result<f32> {
252 let base_similarity = self.vector.cosine_similarity(&other.vector)?;
253
254 let self_age = self.timestamp.elapsed().unwrap_or_default().as_secs_f32();
256 let other_age = other.timestamp.elapsed().unwrap_or_default().as_secs_f32();
257 let age_diff = (self_age - other_age).abs();
258
259 let decay = (-age_diff * (1.0 - self.decay_factor) / 3600.0).exp(); Ok(base_similarity * decay)
263 }
264}
265
266#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
268pub struct WeightedDimensionVector {
269 pub vector: Vector,
271 pub weights: Vec<f32>,
273}
274
275impl WeightedDimensionVector {
276 pub fn new(values: Vec<f32>, weights: Vec<f32>) -> Result<Self> {
278 if values.len() != weights.len() {
279 return Err(anyhow::anyhow!("Values and weights must have same length"));
280 }
281
282 if weights.iter().any(|&w| w < 0.0) {
284 return Err(anyhow::anyhow!("Weights must be non-negative"));
285 }
286
287 Ok(Self {
288 vector: Vector::new(values),
289 weights,
290 })
291 }
292
293 pub fn uniform(values: Vec<f32>) -> Self {
295 let weight = 1.0 / values.len() as f32;
296 let weights = vec![weight; values.len()];
297 Self {
298 vector: Vector::new(values),
299 weights,
300 }
301 }
302
303 pub fn normalize_weights(&mut self) {
305 let sum: f32 = self.weights.iter().sum();
306 if sum > 0.0 {
307 for weight in &mut self.weights {
308 *weight /= sum;
309 }
310 }
311 }
312
313 pub fn weighted_cosine_similarity(&self, other: &WeightedDimensionVector) -> Result<f32> {
315 if self.vector.dimensions != other.vector.dimensions {
316 return Err(anyhow::anyhow!("Vector dimensions must match"));
317 }
318
319 let self_values = self.vector.as_f32();
320 let other_values = other.vector.as_f32();
321
322 let combined_weights: Vec<f32> = self
324 .weights
325 .iter()
326 .zip(&other.weights)
327 .map(|(w1, w2)| (w1 + w2) / 2.0)
328 .collect();
329
330 let weighted_dot: f32 = self_values
331 .iter()
332 .zip(&other_values)
333 .zip(&combined_weights)
334 .map(|((a, b), w)| a * b * w)
335 .sum();
336
337 let self_magnitude: f32 = self_values
338 .iter()
339 .zip(&self.weights)
340 .map(|(v, w)| v * v * w)
341 .sum::<f32>()
342 .sqrt();
343
344 let other_magnitude: f32 = other_values
345 .iter()
346 .zip(&other.weights)
347 .map(|(v, w)| v * v * w)
348 .sum::<f32>()
349 .sqrt();
350
351 if self_magnitude == 0.0 || other_magnitude == 0.0 {
352 return Ok(0.0);
353 }
354
355 Ok(weighted_dot / (self_magnitude * other_magnitude))
356 }
357
358 pub fn top_dimensions(&self, k: usize) -> Vec<(usize, f32, f32)> {
360 let mut indexed: Vec<(usize, f32, f32)> = self
361 .vector
362 .as_f32()
363 .iter()
364 .zip(&self.weights)
365 .enumerate()
366 .map(|(idx, (&value, &weight))| (idx, value, weight))
367 .collect();
368
369 indexed.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
370 indexed.truncate(k);
371 indexed
372 }
373}
374
375#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
377pub struct ConfidenceScoredVector {
378 pub mean: Vector,
380 pub confidence: Vec<f32>,
382 pub overall_confidence: f32,
384}
385
386impl ConfidenceScoredVector {
387 pub fn new(mean_values: Vec<f32>, confidence_scores: Vec<f32>) -> Result<Self> {
389 if mean_values.len() != confidence_scores.len() {
390 return Err(anyhow::anyhow!(
391 "Mean values and confidence scores must have same length"
392 ));
393 }
394
395 if confidence_scores.iter().any(|&c| !(0.0..=1.0).contains(&c)) {
397 return Err(anyhow::anyhow!(
398 "Confidence scores must be between 0.0 and 1.0"
399 ));
400 }
401
402 let overall_confidence =
403 confidence_scores.iter().sum::<f32>() / confidence_scores.len() as f32;
404
405 Ok(Self {
406 mean: Vector::new(mean_values),
407 confidence: confidence_scores,
408 overall_confidence,
409 })
410 }
411
412 pub fn high_confidence(values: Vec<f32>) -> Self {
414 let confidence = vec![0.95; values.len()];
415 Self {
416 mean: Vector::new(values),
417 overall_confidence: 0.95,
418 confidence,
419 }
420 }
421
422 pub fn confidence_weighted_similarity(&self, other: &ConfidenceScoredVector) -> Result<f32> {
424 if self.mean.dimensions != other.mean.dimensions {
425 return Err(anyhow::anyhow!("Vector dimensions must match"));
426 }
427
428 let self_values = self.mean.as_f32();
429 let other_values = other.mean.as_f32();
430
431 let weighted_dot: f32 = self_values
433 .iter()
434 .zip(&other_values)
435 .zip(self.confidence.iter().zip(&other.confidence))
436 .map(|((a, b), (c1, c2))| a * b * c1 * c2)
437 .sum();
438
439 let self_magnitude: f32 = self_values
440 .iter()
441 .zip(&self.confidence)
442 .map(|(v, c)| v * v * c)
443 .sum::<f32>()
444 .sqrt();
445
446 let other_magnitude: f32 = other_values
447 .iter()
448 .zip(&other.confidence)
449 .map(|(v, c)| v * v * c)
450 .sum::<f32>()
451 .sqrt();
452
453 if self_magnitude == 0.0 || other_magnitude == 0.0 {
454 return Ok(0.0);
455 }
456
457 let similarity = weighted_dot / (self_magnitude * other_magnitude);
458
459 Ok(similarity * self.overall_confidence * other.overall_confidence)
461 }
462
463 pub fn sample(&self) -> Vector {
465 use crate::random_utils::NormalSampler as Normal;
466 use scirs2_core::random::Random;
467
468 let mut rng = Random::seed(42);
469 let values = self.mean.as_f32();
470 let mut sampled = Vec::new();
471
472 for (i, &mean_val) in values.iter().enumerate() {
473 let std_dev = (1.0 - self.confidence[i]) * mean_val.abs() * 0.1; if std_dev > 0.0 {
475 let normal = Normal::new(mean_val, std_dev).unwrap();
476 sampled.push(normal.sample(&mut rng));
477 } else {
478 sampled.push(mean_val);
479 }
480 }
481
482 Vector::new(sampled)
483 }
484
485 pub fn low_confidence_dimensions(&self, threshold: f32) -> Vec<(usize, f32, f32)> {
487 self.mean
488 .as_f32()
489 .iter()
490 .zip(&self.confidence)
491 .enumerate()
492 .filter(|&(_, (_, &conf))| conf < threshold)
493 .map(|(idx, (&value, &conf))| (idx, value, conf))
494 .collect()
495 }
496}
497
498#[cfg(test)]
499mod tests {
500 use super::*;
501
502 #[test]
503 fn test_named_dimension_vector() {
504 let names = vec!["age".to_string(), "income".to_string(), "score".to_string()];
505 let values = vec![25.0, 50000.0, 0.85];
506
507 let mut named_vec = NamedDimensionVector::new(names, values).unwrap();
508
509 assert_eq!(named_vec.get_by_name("age"), Some(25.0));
510 assert_eq!(named_vec.get_by_name("income"), Some(50000.0));
511 assert_eq!(named_vec.get_by_name("unknown"), None);
512
513 named_vec.set_by_name("score", 0.95).unwrap();
514 assert_eq!(named_vec.get_by_name("score"), Some(0.95));
515 }
516
517 #[test]
518 fn test_hierarchical_vector() {
519 let level1 = Vector::new(vec![1.0, 2.0]);
520 let level2 = Vector::new(vec![1.0, 2.0, 3.0, 4.0]);
521 let level3 = Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
522
523 let levels = vec![level1, level2, level3];
524 let names = vec![
525 "coarse".to_string(),
526 "medium".to_string(),
527 "fine".to_string(),
528 ];
529
530 let hier_vec = HierarchicalVector::new(levels, names).unwrap();
531
532 assert_eq!(hier_vec.levels.len(), 3);
533 assert!(hier_vec.get_level_by_name("medium").is_some());
534 assert_eq!(hier_vec.get_level_by_name("medium").unwrap().dimensions, 4);
535 }
536
537 #[test]
538 fn test_temporal_vector() {
539 let vec = Vector::new(vec![1.0, 2.0, 3.0]);
540 let temporal = TemporalVector::new(vec)
541 .with_validity(3600) .with_decay(0.9);
543
544 assert!(temporal.is_valid());
545 assert_eq!(temporal.decay_factor, 0.9);
546 }
547
548 #[test]
549 fn test_weighted_dimension_vector() {
550 let values = vec![1.0, 2.0, 3.0];
551 let weights = vec![0.1, 0.3, 0.6];
552
553 let mut weighted = WeightedDimensionVector::new(values, weights).unwrap();
554 weighted.normalize_weights();
555
556 let sum: f32 = weighted.weights.iter().sum();
557 assert!((sum - 1.0).abs() < 1e-6);
558
559 let top = weighted.top_dimensions(2);
560 assert_eq!(top.len(), 2);
561 assert_eq!(top[0].0, 2); }
563
564 #[test]
565 fn test_confidence_scored_vector() {
566 let values = vec![1.0, 2.0, 3.0];
567 let confidence = vec![0.9, 0.8, 0.95];
568
569 let conf_vec = ConfidenceScoredVector::new(values, confidence).unwrap();
570
571 assert!(conf_vec.overall_confidence > 0.8);
572
573 let low_conf = conf_vec.low_confidence_dimensions(0.85);
574 assert_eq!(low_conf.len(), 1);
575 assert_eq!(low_conf[0].0, 1); }
577}