oxirs_vec/
structured_vectors.rs

1//! Structured vector types for enhanced vector representations.
2//!
3//! This module provides advanced vector types including:
4//! - Named dimension vectors for interpretable embeddings
5//! - Hierarchical vectors for multi-level representations
6//! - Temporal vectors with timestamp support
7//! - Weighted dimension vectors for importance scoring
8//! - Confidence-scored vectors for uncertainty modeling
9
10use std::collections::HashMap;
11use std::time::SystemTime;
12
13use anyhow::Result;
14use serde::{Deserialize, Serialize};
15
16use crate::{Vector, VectorData};
17
18/// Named dimension vector where each dimension has a semantic name
19#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
20pub struct NamedDimensionVector {
21    /// Mapping from dimension names to indices
22    pub dimension_names: HashMap<String, usize>,
23    /// Underlying vector data
24    pub vector: Vector,
25}
26
27impl NamedDimensionVector {
28    /// Create a new named dimension vector
29    pub fn new(dimension_names: Vec<String>, values: Vec<f32>) -> Result<Self> {
30        if dimension_names.len() != values.len() {
31            return Err(anyhow::anyhow!("Dimension names must match values length"));
32        }
33
34        let mut name_map = HashMap::new();
35        for (idx, name) in dimension_names.iter().enumerate() {
36            if name_map.contains_key(name) {
37                return Err(anyhow::anyhow!("Duplicate dimension name: {}", name));
38            }
39            name_map.insert(name.clone(), idx);
40        }
41
42        Ok(Self {
43            dimension_names: name_map,
44            vector: Vector::new(values),
45        })
46    }
47
48    /// Get value by dimension name
49    pub fn get_by_name(&self, name: &str) -> Option<f32> {
50        self.dimension_names
51            .get(name)
52            .and_then(|&idx| match &self.vector.values {
53                VectorData::F32(values) => values.get(idx).copied(),
54                _ => {
55                    let f32_values = self.vector.as_f32();
56                    f32_values.get(idx).copied()
57                }
58            })
59    }
60
61    /// Set value by dimension name
62    pub fn set_by_name(&mut self, name: &str, value: f32) -> Result<()> {
63        if let Some(&idx) = self.dimension_names.get(name) {
64            match &mut self.vector.values {
65                VectorData::F32(values) => {
66                    if idx < values.len() {
67                        values[idx] = value;
68                        Ok(())
69                    } else {
70                        Err(anyhow::anyhow!("Index out of bounds"))
71                    }
72                }
73                _ => Err(anyhow::anyhow!(
74                    "Vector type must be F32 for direct modification"
75                )),
76            }
77        } else {
78            Err(anyhow::anyhow!("Unknown dimension name: {}", name))
79        }
80    }
81
82    /// Get dimension names in order
83    pub fn dimension_names_ordered(&self) -> Vec<String> {
84        let mut names: Vec<(String, usize)> = self
85            .dimension_names
86            .iter()
87            .map(|(name, &idx)| (name.clone(), idx))
88            .collect();
89        names.sort_by_key(|(_, idx)| *idx);
90        names.into_iter().map(|(name, _)| name).collect()
91    }
92}
93
94/// Hierarchical vector with multiple levels of embeddings
95#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
96pub struct HierarchicalVector {
97    /// Hierarchy levels from coarse to fine
98    pub levels: Vec<Vector>,
99    /// Level names/descriptions
100    pub level_names: Vec<String>,
101    /// Metadata for each level
102    pub level_metadata: Vec<HashMap<String, String>>,
103}
104
105impl HierarchicalVector {
106    /// Create a new hierarchical vector
107    pub fn new(levels: Vec<Vector>, level_names: Vec<String>) -> Result<Self> {
108        if levels.len() != level_names.len() {
109            return Err(anyhow::anyhow!("Levels and names must have same length"));
110        }
111
112        if levels.is_empty() {
113            return Err(anyhow::anyhow!("Must have at least one level"));
114        }
115
116        let level_metadata = vec![HashMap::new(); levels.len()];
117
118        Ok(Self {
119            levels,
120            level_names,
121            level_metadata,
122        })
123    }
124
125    /// Get vector at specific level
126    pub fn get_level(&self, level: usize) -> Option<&Vector> {
127        self.levels.get(level)
128    }
129
130    /// Get vector by level name
131    pub fn get_level_by_name(&self, name: &str) -> Option<&Vector> {
132        self.level_names
133            .iter()
134            .position(|n| n == name)
135            .and_then(|idx| self.levels.get(idx))
136    }
137
138    /// Add metadata to a level
139    pub fn add_level_metadata(&mut self, level: usize, key: String, value: String) -> Result<()> {
140        if level >= self.levels.len() {
141            return Err(anyhow::anyhow!("Level index out of bounds"));
142        }
143        self.level_metadata[level].insert(key, value);
144        Ok(())
145    }
146
147    /// Compute similarity at specific level
148    pub fn cosine_similarity_at_level(
149        &self,
150        other: &HierarchicalVector,
151        level: usize,
152    ) -> Result<f32> {
153        let self_vec = self
154            .get_level(level)
155            .ok_or_else(|| anyhow::anyhow!("Level {} not found in self", level))?;
156        let other_vec = other
157            .get_level(level)
158            .ok_or_else(|| anyhow::anyhow!("Level {} not found in other", level))?;
159
160        self_vec.cosine_similarity(other_vec)
161    }
162
163    /// Compute weighted similarity across all levels
164    pub fn weighted_similarity(&self, other: &HierarchicalVector, weights: &[f32]) -> Result<f32> {
165        if self.levels.len() != other.levels.len() {
166            return Err(anyhow::anyhow!(
167                "Hierarchical vectors must have same number of levels"
168            ));
169        }
170
171        if weights.len() != self.levels.len() {
172            return Err(anyhow::anyhow!("Weights must match number of levels"));
173        }
174
175        let mut total_similarity = 0.0;
176        let mut total_weight = 0.0;
177
178        for (i, weight) in weights.iter().enumerate() {
179            if *weight > 0.0 {
180                let sim = self.cosine_similarity_at_level(other, i)?;
181                total_similarity += sim * weight;
182                total_weight += weight;
183            }
184        }
185
186        if total_weight > 0.0 {
187            Ok(total_similarity / total_weight)
188        } else {
189            Ok(0.0)
190        }
191    }
192}
193
194/// Temporal vector with timestamp information
195#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
196pub struct TemporalVector {
197    /// The vector value
198    pub vector: Vector,
199    /// Timestamp when the vector was created/computed
200    pub timestamp: SystemTime,
201    /// Optional validity duration in seconds
202    pub validity_duration: Option<u64>,
203    /// Time-based decay factor (0.0 to 1.0)
204    pub decay_factor: f32,
205}
206
207impl TemporalVector {
208    /// Create a new temporal vector
209    pub fn new(vector: Vector) -> Self {
210        Self {
211            vector,
212            timestamp: SystemTime::now(),
213            validity_duration: None,
214            decay_factor: 1.0,
215        }
216    }
217
218    /// Create with specific timestamp
219    pub fn with_timestamp(vector: Vector, timestamp: SystemTime) -> Self {
220        Self {
221            vector,
222            timestamp,
223            validity_duration: None,
224            decay_factor: 1.0,
225        }
226    }
227
228    /// Set validity duration
229    pub fn with_validity(mut self, duration_secs: u64) -> Self {
230        self.validity_duration = Some(duration_secs);
231        self
232    }
233
234    /// Set decay factor
235    pub fn with_decay(mut self, decay_factor: f32) -> Self {
236        self.decay_factor = decay_factor.clamp(0.0, 1.0);
237        self
238    }
239
240    /// Check if vector is still valid
241    pub fn is_valid(&self) -> bool {
242        if let Some(duration) = self.validity_duration {
243            if let Ok(elapsed) = self.timestamp.elapsed() {
244                return elapsed.as_secs() < duration;
245            }
246        }
247        true
248    }
249
250    /// Get time-decayed similarity
251    pub fn decayed_similarity(&self, other: &TemporalVector) -> Result<f32> {
252        let base_similarity = self.vector.cosine_similarity(&other.vector)?;
253
254        // Apply time decay based on age difference
255        let self_age = self.timestamp.elapsed().unwrap_or_default().as_secs_f32();
256        let other_age = other.timestamp.elapsed().unwrap_or_default().as_secs_f32();
257        let age_diff = (self_age - other_age).abs();
258
259        // Exponential decay based on age difference
260        let decay = (-age_diff * (1.0 - self.decay_factor) / 3600.0).exp(); // Hourly decay
261
262        Ok(base_similarity * decay)
263    }
264}
265
266/// Weighted dimension vector where each dimension has an importance weight
267#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
268pub struct WeightedDimensionVector {
269    /// The vector values
270    pub vector: Vector,
271    /// Importance weights for each dimension
272    pub weights: Vec<f32>,
273}
274
275impl WeightedDimensionVector {
276    /// Create a new weighted dimension vector
277    pub fn new(values: Vec<f32>, weights: Vec<f32>) -> Result<Self> {
278        if values.len() != weights.len() {
279            return Err(anyhow::anyhow!("Values and weights must have same length"));
280        }
281
282        // Validate weights are non-negative
283        if weights.iter().any(|&w| w < 0.0) {
284            return Err(anyhow::anyhow!("Weights must be non-negative"));
285        }
286
287        Ok(Self {
288            vector: Vector::new(values),
289            weights,
290        })
291    }
292
293    /// Create with uniform weights
294    pub fn uniform(values: Vec<f32>) -> Self {
295        let weight = 1.0 / values.len() as f32;
296        let weights = vec![weight; values.len()];
297        Self {
298            vector: Vector::new(values),
299            weights,
300        }
301    }
302
303    /// Normalize weights to sum to 1.0
304    pub fn normalize_weights(&mut self) {
305        let sum: f32 = self.weights.iter().sum();
306        if sum > 0.0 {
307            for weight in &mut self.weights {
308                *weight /= sum;
309            }
310        }
311    }
312
313    /// Compute weighted cosine similarity
314    pub fn weighted_cosine_similarity(&self, other: &WeightedDimensionVector) -> Result<f32> {
315        if self.vector.dimensions != other.vector.dimensions {
316            return Err(anyhow::anyhow!("Vector dimensions must match"));
317        }
318
319        let self_values = self.vector.as_f32();
320        let other_values = other.vector.as_f32();
321
322        // Combine weights (e.g., by averaging)
323        let combined_weights: Vec<f32> = self
324            .weights
325            .iter()
326            .zip(&other.weights)
327            .map(|(w1, w2)| (w1 + w2) / 2.0)
328            .collect();
329
330        let weighted_dot: f32 = self_values
331            .iter()
332            .zip(&other_values)
333            .zip(&combined_weights)
334            .map(|((a, b), w)| a * b * w)
335            .sum();
336
337        let self_magnitude: f32 = self_values
338            .iter()
339            .zip(&self.weights)
340            .map(|(v, w)| v * v * w)
341            .sum::<f32>()
342            .sqrt();
343
344        let other_magnitude: f32 = other_values
345            .iter()
346            .zip(&other.weights)
347            .map(|(v, w)| v * v * w)
348            .sum::<f32>()
349            .sqrt();
350
351        if self_magnitude == 0.0 || other_magnitude == 0.0 {
352            return Ok(0.0);
353        }
354
355        Ok(weighted_dot / (self_magnitude * other_magnitude))
356    }
357
358    /// Get the most important dimensions
359    pub fn top_dimensions(&self, k: usize) -> Vec<(usize, f32, f32)> {
360        let mut indexed: Vec<(usize, f32, f32)> = self
361            .vector
362            .as_f32()
363            .iter()
364            .zip(&self.weights)
365            .enumerate()
366            .map(|(idx, (&value, &weight))| (idx, value, weight))
367            .collect();
368
369        indexed.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
370        indexed.truncate(k);
371        indexed
372    }
373}
374
375/// Confidence-scored vector with uncertainty estimates
376#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
377pub struct ConfidenceScoredVector {
378    /// The mean vector values
379    pub mean: Vector,
380    /// Confidence scores or standard deviations for each dimension
381    pub confidence: Vec<f32>,
382    /// Overall confidence score (0.0 to 1.0)
383    pub overall_confidence: f32,
384}
385
386impl ConfidenceScoredVector {
387    /// Create a new confidence-scored vector
388    pub fn new(mean_values: Vec<f32>, confidence_scores: Vec<f32>) -> Result<Self> {
389        if mean_values.len() != confidence_scores.len() {
390            return Err(anyhow::anyhow!(
391                "Mean values and confidence scores must have same length"
392            ));
393        }
394
395        // Validate confidence scores
396        if confidence_scores.iter().any(|&c| !(0.0..=1.0).contains(&c)) {
397            return Err(anyhow::anyhow!(
398                "Confidence scores must be between 0.0 and 1.0"
399            ));
400        }
401
402        let overall_confidence =
403            confidence_scores.iter().sum::<f32>() / confidence_scores.len() as f32;
404
405        Ok(Self {
406            mean: Vector::new(mean_values),
407            confidence: confidence_scores,
408            overall_confidence,
409        })
410    }
411
412    /// Create with uniform high confidence
413    pub fn high_confidence(values: Vec<f32>) -> Self {
414        let confidence = vec![0.95; values.len()];
415        Self {
416            mean: Vector::new(values),
417            overall_confidence: 0.95,
418            confidence,
419        }
420    }
421
422    /// Compute similarity with confidence weighting
423    pub fn confidence_weighted_similarity(&self, other: &ConfidenceScoredVector) -> Result<f32> {
424        if self.mean.dimensions != other.mean.dimensions {
425            return Err(anyhow::anyhow!("Vector dimensions must match"));
426        }
427
428        let self_values = self.mean.as_f32();
429        let other_values = other.mean.as_f32();
430
431        // Use confidence scores as weights
432        let weighted_dot: f32 = self_values
433            .iter()
434            .zip(&other_values)
435            .zip(self.confidence.iter().zip(&other.confidence))
436            .map(|((a, b), (c1, c2))| a * b * c1 * c2)
437            .sum();
438
439        let self_magnitude: f32 = self_values
440            .iter()
441            .zip(&self.confidence)
442            .map(|(v, c)| v * v * c)
443            .sum::<f32>()
444            .sqrt();
445
446        let other_magnitude: f32 = other_values
447            .iter()
448            .zip(&other.confidence)
449            .map(|(v, c)| v * v * c)
450            .sum::<f32>()
451            .sqrt();
452
453        if self_magnitude == 0.0 || other_magnitude == 0.0 {
454            return Ok(0.0);
455        }
456
457        let similarity = weighted_dot / (self_magnitude * other_magnitude);
458
459        // Scale by overall confidence
460        Ok(similarity * self.overall_confidence * other.overall_confidence)
461    }
462
463    /// Sample vector from confidence distribution (assuming Gaussian)
464    pub fn sample(&self) -> Vector {
465        use crate::random_utils::NormalSampler as Normal;
466        use scirs2_core::random::Random;
467
468        let mut rng = Random::seed(42);
469        let values = self.mean.as_f32();
470        let mut sampled = Vec::new();
471
472        for (i, &mean_val) in values.iter().enumerate() {
473            let std_dev = (1.0 - self.confidence[i]) * mean_val.abs() * 0.1; // Convert confidence to std dev
474            if std_dev > 0.0 {
475                let normal = Normal::new(mean_val, std_dev).unwrap();
476                sampled.push(normal.sample(&mut rng));
477            } else {
478                sampled.push(mean_val);
479            }
480        }
481
482        Vector::new(sampled)
483    }
484
485    /// Get dimensions with low confidence
486    pub fn low_confidence_dimensions(&self, threshold: f32) -> Vec<(usize, f32, f32)> {
487        self.mean
488            .as_f32()
489            .iter()
490            .zip(&self.confidence)
491            .enumerate()
492            .filter(|&(_, (_, &conf))| conf < threshold)
493            .map(|(idx, (&value, &conf))| (idx, value, conf))
494            .collect()
495    }
496}
497
498#[cfg(test)]
499mod tests {
500    use super::*;
501
502    #[test]
503    fn test_named_dimension_vector() {
504        let names = vec!["age".to_string(), "income".to_string(), "score".to_string()];
505        let values = vec![25.0, 50000.0, 0.85];
506
507        let mut named_vec = NamedDimensionVector::new(names, values).unwrap();
508
509        assert_eq!(named_vec.get_by_name("age"), Some(25.0));
510        assert_eq!(named_vec.get_by_name("income"), Some(50000.0));
511        assert_eq!(named_vec.get_by_name("unknown"), None);
512
513        named_vec.set_by_name("score", 0.95).unwrap();
514        assert_eq!(named_vec.get_by_name("score"), Some(0.95));
515    }
516
517    #[test]
518    fn test_hierarchical_vector() {
519        let level1 = Vector::new(vec![1.0, 2.0]);
520        let level2 = Vector::new(vec![1.0, 2.0, 3.0, 4.0]);
521        let level3 = Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
522
523        let levels = vec![level1, level2, level3];
524        let names = vec![
525            "coarse".to_string(),
526            "medium".to_string(),
527            "fine".to_string(),
528        ];
529
530        let hier_vec = HierarchicalVector::new(levels, names).unwrap();
531
532        assert_eq!(hier_vec.levels.len(), 3);
533        assert!(hier_vec.get_level_by_name("medium").is_some());
534        assert_eq!(hier_vec.get_level_by_name("medium").unwrap().dimensions, 4);
535    }
536
537    #[test]
538    fn test_temporal_vector() {
539        let vec = Vector::new(vec![1.0, 2.0, 3.0]);
540        let temporal = TemporalVector::new(vec)
541            .with_validity(3600) // 1 hour
542            .with_decay(0.9);
543
544        assert!(temporal.is_valid());
545        assert_eq!(temporal.decay_factor, 0.9);
546    }
547
548    #[test]
549    fn test_weighted_dimension_vector() {
550        let values = vec![1.0, 2.0, 3.0];
551        let weights = vec![0.1, 0.3, 0.6];
552
553        let mut weighted = WeightedDimensionVector::new(values, weights).unwrap();
554        weighted.normalize_weights();
555
556        let sum: f32 = weighted.weights.iter().sum();
557        assert!((sum - 1.0).abs() < 1e-6);
558
559        let top = weighted.top_dimensions(2);
560        assert_eq!(top.len(), 2);
561        assert_eq!(top[0].0, 2); // Index of highest weight
562    }
563
564    #[test]
565    fn test_confidence_scored_vector() {
566        let values = vec![1.0, 2.0, 3.0];
567        let confidence = vec![0.9, 0.8, 0.95];
568
569        let conf_vec = ConfidenceScoredVector::new(values, confidence).unwrap();
570
571        assert!(conf_vec.overall_confidence > 0.8);
572
573        let low_conf = conf_vec.low_confidence_dimensions(0.85);
574        assert_eq!(low_conf.len(), 1);
575        assert_eq!(low_conf[0].0, 1); // Index with 0.8 confidence
576    }
577}