ipfrs_semantic/
metadata.rs

1//! Metadata storage and filtering for hybrid search
2//!
3//! This module provides metadata management for vectors, enabling
4//! hybrid search that combines vector similarity with attribute filtering.
5
6use ipfrs_core::{Cid, Result};
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, HashMap, HashSet};
9use std::sync::{Arc, RwLock};
10use std::time::{SystemTime, UNIX_EPOCH};
11
12// Type aliases for complex index structures
13type StringIndexMap = HashMap<String, HashMap<String, HashSet<Cid>>>;
14type NumericIndexMap = HashMap<String, BTreeMap<i64, HashSet<Cid>>>;
15
16/// Metadata value types
17#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18pub enum MetadataValue {
19    /// String value
20    String(String),
21    /// Integer value
22    Integer(i64),
23    /// Float value
24    Float(f64),
25    /// Boolean value
26    Boolean(bool),
27    /// Timestamp (Unix epoch seconds)
28    Timestamp(u64),
29    /// Array of strings
30    StringArray(Vec<String>),
31    /// Null value
32    Null,
33}
34
35impl MetadataValue {
36    /// Create a timestamp for now
37    pub fn now() -> Self {
38        let timestamp = SystemTime::now()
39            .duration_since(UNIX_EPOCH)
40            .unwrap_or_default()
41            .as_secs();
42        MetadataValue::Timestamp(timestamp)
43    }
44
45    /// Get as string if possible
46    pub fn as_string(&self) -> Option<&str> {
47        match self {
48            MetadataValue::String(s) => Some(s),
49            _ => None,
50        }
51    }
52
53    /// Get as integer if possible
54    pub fn as_integer(&self) -> Option<i64> {
55        match self {
56            MetadataValue::Integer(i) => Some(*i),
57            _ => None,
58        }
59    }
60
61    /// Get as float if possible
62    pub fn as_float(&self) -> Option<f64> {
63        match self {
64            MetadataValue::Float(f) => Some(*f),
65            MetadataValue::Integer(i) => Some(*i as f64),
66            _ => None,
67        }
68    }
69
70    /// Get as timestamp if possible
71    pub fn as_timestamp(&self) -> Option<u64> {
72        match self {
73            MetadataValue::Timestamp(t) => Some(*t),
74            MetadataValue::Integer(i) if *i >= 0 => Some(*i as u64),
75            _ => None,
76        }
77    }
78
79    /// Get as boolean if possible
80    pub fn as_boolean(&self) -> Option<bool> {
81        match self {
82            MetadataValue::Boolean(b) => Some(*b),
83            _ => None,
84        }
85    }
86}
87
88/// Metadata record for a vector
89#[derive(Debug, Clone, Default, Serialize, Deserialize)]
90pub struct Metadata {
91    /// Key-value pairs
92    pub fields: HashMap<String, MetadataValue>,
93    /// Creation timestamp
94    pub created_at: u64,
95    /// Last updated timestamp
96    pub updated_at: u64,
97}
98
99impl Metadata {
100    /// Create new metadata with current timestamp
101    pub fn new() -> Self {
102        let now = SystemTime::now()
103            .duration_since(UNIX_EPOCH)
104            .unwrap_or_default()
105            .as_secs();
106
107        Self {
108            fields: HashMap::new(),
109            created_at: now,
110            updated_at: now,
111        }
112    }
113
114    /// Set a field value
115    pub fn set(&mut self, key: impl Into<String>, value: MetadataValue) -> &mut Self {
116        self.fields.insert(key.into(), value);
117        self.updated_at = SystemTime::now()
118            .duration_since(UNIX_EPOCH)
119            .unwrap_or_default()
120            .as_secs();
121        self
122    }
123
124    /// Get a field value
125    pub fn get(&self, key: &str) -> Option<&MetadataValue> {
126        self.fields.get(key)
127    }
128
129    /// Check if a field exists
130    pub fn has(&self, key: &str) -> bool {
131        self.fields.contains_key(key)
132    }
133
134    /// Remove a field
135    pub fn remove(&mut self, key: &str) -> Option<MetadataValue> {
136        self.fields.remove(key)
137    }
138
139    /// Builder method for string field
140    pub fn with_string(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
141        self.set(key, MetadataValue::String(value.into()));
142        self
143    }
144
145    /// Builder method for integer field
146    pub fn with_integer(mut self, key: impl Into<String>, value: i64) -> Self {
147        self.set(key, MetadataValue::Integer(value));
148        self
149    }
150
151    /// Builder method for timestamp field
152    pub fn with_timestamp(mut self, key: impl Into<String>, value: u64) -> Self {
153        self.set(key, MetadataValue::Timestamp(value));
154        self
155    }
156
157    /// Builder method for boolean field
158    pub fn with_boolean(mut self, key: impl Into<String>, value: bool) -> Self {
159        self.set(key, MetadataValue::Boolean(value));
160        self
161    }
162}
163
164/// Metadata filter expression
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub enum MetadataFilter {
167    /// Equality check: field == value
168    Equals(String, MetadataValue),
169    /// Not equal: field != value
170    NotEquals(String, MetadataValue),
171    /// Greater than: field > value
172    GreaterThan(String, MetadataValue),
173    /// Greater than or equal: field >= value
174    GreaterThanOrEqual(String, MetadataValue),
175    /// Less than: field < value
176    LessThan(String, MetadataValue),
177    /// Less than or equal: field <= value
178    LessThanOrEqual(String, MetadataValue),
179    /// String contains: field contains substring
180    Contains(String, String),
181    /// String starts with: field starts with prefix
182    StartsWith(String, String),
183    /// String ends with: field ends with suffix
184    EndsWith(String, String),
185    /// Value in set: field in \[values\]
186    In(String, Vec<MetadataValue>),
187    /// Value not in set: field not in \[values\]
188    NotIn(String, Vec<MetadataValue>),
189    /// Field exists
190    Exists(String),
191    /// Field does not exist
192    NotExists(String),
193    /// Timestamp range: created_at or updated_at within range
194    TimeRange {
195        field: String,
196        start: Option<u64>,
197        end: Option<u64>,
198    },
199    /// Logical AND of multiple filters
200    And(Vec<MetadataFilter>),
201    /// Logical OR of multiple filters
202    Or(Vec<MetadataFilter>),
203    /// Logical NOT
204    Not(Box<MetadataFilter>),
205}
206
207impl MetadataFilter {
208    /// Create an equals filter
209    pub fn eq(field: impl Into<String>, value: MetadataValue) -> Self {
210        MetadataFilter::Equals(field.into(), value)
211    }
212
213    /// Create a not equals filter
214    pub fn ne(field: impl Into<String>, value: MetadataValue) -> Self {
215        MetadataFilter::NotEquals(field.into(), value)
216    }
217
218    /// Create a greater than filter
219    pub fn gt(field: impl Into<String>, value: MetadataValue) -> Self {
220        MetadataFilter::GreaterThan(field.into(), value)
221    }
222
223    /// Create a greater than or equal filter
224    pub fn gte(field: impl Into<String>, value: MetadataValue) -> Self {
225        MetadataFilter::GreaterThanOrEqual(field.into(), value)
226    }
227
228    /// Create a less than filter
229    pub fn lt(field: impl Into<String>, value: MetadataValue) -> Self {
230        MetadataFilter::LessThan(field.into(), value)
231    }
232
233    /// Create a less than or equal filter
234    pub fn lte(field: impl Into<String>, value: MetadataValue) -> Self {
235        MetadataFilter::LessThanOrEqual(field.into(), value)
236    }
237
238    /// Create a time range filter
239    pub fn time_range(field: impl Into<String>, start: Option<u64>, end: Option<u64>) -> Self {
240        MetadataFilter::TimeRange {
241            field: field.into(),
242            start,
243            end,
244        }
245    }
246
247    /// Create an AND filter
248    pub fn and(filters: Vec<MetadataFilter>) -> Self {
249        MetadataFilter::And(filters)
250    }
251
252    /// Create an OR filter
253    pub fn or(filters: Vec<MetadataFilter>) -> Self {
254        MetadataFilter::Or(filters)
255    }
256
257    /// Create a NOT filter
258    pub fn negate(filter: MetadataFilter) -> Self {
259        MetadataFilter::Not(Box::new(filter))
260    }
261
262    /// Evaluate the filter against metadata
263    pub fn matches(&self, metadata: &Metadata) -> bool {
264        match self {
265            MetadataFilter::Equals(field, value) => metadata.get(field) == Some(value),
266            MetadataFilter::NotEquals(field, value) => metadata.get(field) != Some(value),
267            MetadataFilter::GreaterThan(field, value) => {
268                Self::compare_gt(metadata.get(field), value)
269            }
270            MetadataFilter::GreaterThanOrEqual(field, value) => {
271                Self::compare_gte(metadata.get(field), value)
272            }
273            MetadataFilter::LessThan(field, value) => Self::compare_lt(metadata.get(field), value),
274            MetadataFilter::LessThanOrEqual(field, value) => {
275                Self::compare_lte(metadata.get(field), value)
276            }
277            MetadataFilter::Contains(field, substring) => metadata
278                .get(field)
279                .and_then(|v| v.as_string())
280                .is_some_and(|s| s.contains(substring)),
281            MetadataFilter::StartsWith(field, prefix) => metadata
282                .get(field)
283                .and_then(|v| v.as_string())
284                .is_some_and(|s| s.starts_with(prefix)),
285            MetadataFilter::EndsWith(field, suffix) => metadata
286                .get(field)
287                .and_then(|v| v.as_string())
288                .is_some_and(|s| s.ends_with(suffix)),
289            MetadataFilter::In(field, values) => {
290                metadata.get(field).is_some_and(|v| values.contains(v))
291            }
292            MetadataFilter::NotIn(field, values) => {
293                metadata.get(field).is_none_or(|v| !values.contains(v))
294            }
295            MetadataFilter::Exists(field) => metadata.has(field),
296            MetadataFilter::NotExists(field) => !metadata.has(field),
297            MetadataFilter::TimeRange { field, start, end } => {
298                let timestamp = if field == "created_at" {
299                    Some(metadata.created_at)
300                } else if field == "updated_at" {
301                    Some(metadata.updated_at)
302                } else {
303                    metadata.get(field).and_then(|v| v.as_timestamp())
304                };
305
306                timestamp.is_some_and(|t| {
307                    let after_start = start.is_none_or(|s| t >= s);
308                    let before_end = end.is_none_or(|e| t <= e);
309                    after_start && before_end
310                })
311            }
312            MetadataFilter::And(filters) => filters.iter().all(|f| f.matches(metadata)),
313            MetadataFilter::Or(filters) => filters.iter().any(|f| f.matches(metadata)),
314            MetadataFilter::Not(filter) => !filter.matches(metadata),
315        }
316    }
317
318    fn compare_gt(field_value: Option<&MetadataValue>, compare_value: &MetadataValue) -> bool {
319        match (field_value, compare_value) {
320            (Some(MetadataValue::Integer(a)), MetadataValue::Integer(b)) => a > b,
321            (Some(MetadataValue::Float(a)), MetadataValue::Float(b)) => a > b,
322            (Some(MetadataValue::Integer(a)), MetadataValue::Float(b)) => (*a as f64) > *b,
323            (Some(MetadataValue::Float(a)), MetadataValue::Integer(b)) => *a > (*b as f64),
324            (Some(MetadataValue::Timestamp(a)), MetadataValue::Timestamp(b)) => a > b,
325            (Some(MetadataValue::String(a)), MetadataValue::String(b)) => a > b,
326            _ => false,
327        }
328    }
329
330    fn compare_gte(field_value: Option<&MetadataValue>, compare_value: &MetadataValue) -> bool {
331        match (field_value, compare_value) {
332            (Some(MetadataValue::Integer(a)), MetadataValue::Integer(b)) => a >= b,
333            (Some(MetadataValue::Float(a)), MetadataValue::Float(b)) => a >= b,
334            (Some(MetadataValue::Integer(a)), MetadataValue::Float(b)) => (*a as f64) >= *b,
335            (Some(MetadataValue::Float(a)), MetadataValue::Integer(b)) => *a >= (*b as f64),
336            (Some(MetadataValue::Timestamp(a)), MetadataValue::Timestamp(b)) => a >= b,
337            (Some(MetadataValue::String(a)), MetadataValue::String(b)) => a >= b,
338            _ => false,
339        }
340    }
341
342    fn compare_lt(field_value: Option<&MetadataValue>, compare_value: &MetadataValue) -> bool {
343        match (field_value, compare_value) {
344            (Some(MetadataValue::Integer(a)), MetadataValue::Integer(b)) => a < b,
345            (Some(MetadataValue::Float(a)), MetadataValue::Float(b)) => a < b,
346            (Some(MetadataValue::Integer(a)), MetadataValue::Float(b)) => (*a as f64) < *b,
347            (Some(MetadataValue::Float(a)), MetadataValue::Integer(b)) => *a < (*b as f64),
348            (Some(MetadataValue::Timestamp(a)), MetadataValue::Timestamp(b)) => a < b,
349            (Some(MetadataValue::String(a)), MetadataValue::String(b)) => a < b,
350            _ => false,
351        }
352    }
353
354    fn compare_lte(field_value: Option<&MetadataValue>, compare_value: &MetadataValue) -> bool {
355        match (field_value, compare_value) {
356            (Some(MetadataValue::Integer(a)), MetadataValue::Integer(b)) => a <= b,
357            (Some(MetadataValue::Float(a)), MetadataValue::Float(b)) => a <= b,
358            (Some(MetadataValue::Integer(a)), MetadataValue::Float(b)) => (*a as f64) <= *b,
359            (Some(MetadataValue::Float(a)), MetadataValue::Integer(b)) => *a <= (*b as f64),
360            (Some(MetadataValue::Timestamp(a)), MetadataValue::Timestamp(b)) => a <= b,
361            (Some(MetadataValue::String(a)), MetadataValue::String(b)) => a <= b,
362            _ => false,
363        }
364    }
365}
366
367/// Metadata store for CID-indexed metadata
368pub struct MetadataStore {
369    /// CID to metadata mapping
370    data: Arc<RwLock<HashMap<Cid, Metadata>>>,
371    /// Inverted index for string fields (field -> value -> CIDs)
372    string_index: Arc<RwLock<StringIndexMap>>,
373    /// Sorted index for numeric fields (field -> sorted (value, CID) pairs)
374    numeric_index: Arc<RwLock<NumericIndexMap>>,
375    /// Timestamp index for temporal queries
376    timestamp_index: Arc<RwLock<BTreeMap<u64, HashSet<Cid>>>>,
377}
378
379impl Default for MetadataStore {
380    fn default() -> Self {
381        Self::new()
382    }
383}
384
385impl MetadataStore {
386    /// Create a new metadata store
387    pub fn new() -> Self {
388        Self {
389            data: Arc::new(RwLock::new(HashMap::new())),
390            string_index: Arc::new(RwLock::new(HashMap::new())),
391            numeric_index: Arc::new(RwLock::new(HashMap::new())),
392            timestamp_index: Arc::new(RwLock::new(BTreeMap::new())),
393        }
394    }
395
396    /// Insert or update metadata for a CID
397    pub fn insert(&self, cid: Cid, metadata: Metadata) -> Result<()> {
398        // Remove old indexes if updating
399        if self.data.read().unwrap().contains_key(&cid) {
400            self.remove_from_indexes(&cid)?;
401        }
402
403        // Update indexes
404        self.add_to_indexes(&cid, &metadata)?;
405
406        // Store metadata
407        self.data.write().unwrap().insert(cid, metadata);
408
409        Ok(())
410    }
411
412    /// Get metadata for a CID
413    pub fn get(&self, cid: &Cid) -> Option<Metadata> {
414        self.data.read().unwrap().get(cid).cloned()
415    }
416
417    /// Remove metadata for a CID
418    pub fn remove(&self, cid: &Cid) -> Result<Option<Metadata>> {
419        self.remove_from_indexes(cid)?;
420        Ok(self.data.write().unwrap().remove(cid))
421    }
422
423    /// Check if metadata exists for a CID
424    pub fn contains(&self, cid: &Cid) -> bool {
425        self.data.read().unwrap().contains_key(cid)
426    }
427
428    /// Get all CIDs with metadata
429    pub fn cids(&self) -> Vec<Cid> {
430        self.data.read().unwrap().keys().copied().collect()
431    }
432
433    /// Get number of stored metadata records
434    pub fn len(&self) -> usize {
435        self.data.read().unwrap().len()
436    }
437
438    /// Check if store is empty
439    pub fn is_empty(&self) -> bool {
440        self.data.read().unwrap().is_empty()
441    }
442
443    /// Filter CIDs by metadata filter
444    pub fn filter(&self, filter: &MetadataFilter) -> Vec<Cid> {
445        // Try to use indexes for efficient filtering
446        if let Some(cids) = self.filter_with_index(filter) {
447            return cids;
448        }
449
450        // Fall back to linear scan
451        self.data
452            .read()
453            .unwrap()
454            .iter()
455            .filter(|(_, m)| filter.matches(m))
456            .map(|(cid, _)| *cid)
457            .collect()
458    }
459
460    /// Filter using indexes if possible
461    fn filter_with_index(&self, filter: &MetadataFilter) -> Option<Vec<Cid>> {
462        match filter {
463            MetadataFilter::Equals(field, MetadataValue::String(value)) => {
464                let index = self.string_index.read().unwrap();
465                index
466                    .get(field)
467                    .and_then(|field_index| field_index.get(value))
468                    .map(|cids| cids.iter().copied().collect())
469            }
470            MetadataFilter::TimeRange { field, start, end } if field == "created_at" => {
471                let index = self.timestamp_index.read().unwrap();
472                let range_start = start.unwrap_or(0);
473                let range_end = end.unwrap_or(u64::MAX);
474
475                let cids: HashSet<Cid> = index
476                    .range(range_start..=range_end)
477                    .flat_map(|(_, cids)| cids.iter().copied())
478                    .collect();
479
480                Some(cids.into_iter().collect())
481            }
482            MetadataFilter::And(filters) => {
483                // Intersect results from indexed filters
484                let mut result: Option<HashSet<Cid>> = None;
485
486                for f in filters {
487                    if let Some(cids) = self.filter_with_index(f) {
488                        let cid_set: HashSet<Cid> = cids.into_iter().collect();
489                        result = Some(match result {
490                            Some(existing) => existing.intersection(&cid_set).copied().collect(),
491                            None => cid_set,
492                        });
493                    }
494                }
495
496                result.map(|s| s.into_iter().collect())
497            }
498            _ => None,
499        }
500    }
501
502    /// Add metadata to indexes
503    fn add_to_indexes(&self, cid: &Cid, metadata: &Metadata) -> Result<()> {
504        // Index string fields
505        for (key, value) in &metadata.fields {
506            if let MetadataValue::String(s) = value {
507                self.string_index
508                    .write()
509                    .unwrap()
510                    .entry(key.clone())
511                    .or_default()
512                    .entry(s.clone())
513                    .or_default()
514                    .insert(*cid);
515            }
516
517            if let Some(i) = value.as_integer() {
518                self.numeric_index
519                    .write()
520                    .unwrap()
521                    .entry(key.clone())
522                    .or_default()
523                    .entry(i)
524                    .or_default()
525                    .insert(*cid);
526            }
527        }
528
529        // Index creation timestamp
530        self.timestamp_index
531            .write()
532            .unwrap()
533            .entry(metadata.created_at)
534            .or_default()
535            .insert(*cid);
536
537        Ok(())
538    }
539
540    /// Remove metadata from indexes
541    fn remove_from_indexes(&self, cid: &Cid) -> Result<()> {
542        let data = self.data.read().unwrap();
543        if let Some(metadata) = data.get(cid) {
544            // Remove from string index
545            for (key, value) in &metadata.fields {
546                if let MetadataValue::String(s) = value {
547                    if let Some(field_index) = self.string_index.write().unwrap().get_mut(key) {
548                        if let Some(cids) = field_index.get_mut(s) {
549                            cids.remove(cid);
550                        }
551                    }
552                }
553
554                if let Some(i) = value.as_integer() {
555                    if let Some(field_index) = self.numeric_index.write().unwrap().get_mut(key) {
556                        if let Some(cids) = field_index.get_mut(&i) {
557                            cids.remove(cid);
558                        }
559                    }
560                }
561            }
562
563            // Remove from timestamp index
564            if let Some(cids) = self
565                .timestamp_index
566                .write()
567                .unwrap()
568                .get_mut(&metadata.created_at)
569            {
570                cids.remove(cid);
571            }
572        }
573
574        Ok(())
575    }
576
577    /// Get CIDs created within a time range
578    pub fn get_by_time_range(&self, start: Option<u64>, end: Option<u64>) -> Vec<Cid> {
579        let index = self.timestamp_index.read().unwrap();
580        let range_start = start.unwrap_or(0);
581        let range_end = end.unwrap_or(u64::MAX);
582
583        index
584            .range(range_start..=range_end)
585            .flat_map(|(_, cids)| cids.iter().copied())
586            .collect()
587    }
588
589    /// Get unique values for a field
590    pub fn get_field_values(&self, field: &str) -> Vec<MetadataValue> {
591        self.data
592            .read()
593            .unwrap()
594            .values()
595            .filter_map(|m| m.get(field).cloned())
596            .collect::<HashSet<_>>()
597            .into_iter()
598            .collect()
599    }
600
601    /// Get facet counts for a string field
602    pub fn get_facet_counts(&self, field: &str) -> HashMap<String, usize> {
603        let index = self.string_index.read().unwrap();
604        index
605            .get(field)
606            .map(|field_index| {
607                field_index
608                    .iter()
609                    .map(|(value, cids)| (value.clone(), cids.len()))
610                    .collect()
611            })
612            .unwrap_or_default()
613    }
614
615    /// Clear all metadata
616    pub fn clear(&self) {
617        self.data.write().unwrap().clear();
618        self.string_index.write().unwrap().clear();
619        self.numeric_index.write().unwrap().clear();
620        self.timestamp_index.write().unwrap().clear();
621    }
622}
623
624/// Temporal query options
625#[derive(Debug, Clone, Serialize, Deserialize)]
626pub struct TemporalOptions {
627    /// Time range start (Unix timestamp)
628    pub start: Option<u64>,
629    /// Time range end (Unix timestamp)
630    pub end: Option<u64>,
631    /// Apply recency boost to scores
632    pub recency_boost: bool,
633    /// Recency decay factor (higher = faster decay)
634    pub decay_factor: f32,
635    /// Reference time for recency calculation (default: now)
636    pub reference_time: Option<u64>,
637}
638
639impl Default for TemporalOptions {
640    fn default() -> Self {
641        Self {
642            start: None,
643            end: None,
644            recency_boost: false,
645            decay_factor: 1.0,
646            reference_time: None,
647        }
648    }
649}
650
651impl TemporalOptions {
652    /// Create options for a specific time range
653    pub fn range(start: u64, end: u64) -> Self {
654        Self {
655            start: Some(start),
656            end: Some(end),
657            ..Default::default()
658        }
659    }
660
661    /// Create options with recency boosting
662    pub fn with_recency(decay_factor: f32) -> Self {
663        Self {
664            recency_boost: true,
665            decay_factor,
666            ..Default::default()
667        }
668    }
669
670    /// Calculate recency boost multiplier for a timestamp
671    pub fn recency_multiplier(&self, timestamp: u64) -> f32 {
672        if !self.recency_boost {
673            return 1.0;
674        }
675
676        let reference = self.reference_time.unwrap_or_else(|| {
677            SystemTime::now()
678                .duration_since(UNIX_EPOCH)
679                .unwrap_or_default()
680                .as_secs()
681        });
682
683        if timestamp >= reference {
684            return 1.0;
685        }
686
687        let age_seconds = reference - timestamp;
688        let age_days = age_seconds as f32 / 86400.0;
689
690        // Exponential decay: e^(-decay_factor * age_days)
691        (-self.decay_factor * age_days / 30.0).exp()
692    }
693}
694
695impl std::hash::Hash for MetadataValue {
696    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
697        match self {
698            MetadataValue::String(s) => {
699                0u8.hash(state);
700                s.hash(state);
701            }
702            MetadataValue::Integer(i) => {
703                1u8.hash(state);
704                i.hash(state);
705            }
706            MetadataValue::Float(f) => {
707                2u8.hash(state);
708                f.to_bits().hash(state);
709            }
710            MetadataValue::Boolean(b) => {
711                3u8.hash(state);
712                b.hash(state);
713            }
714            MetadataValue::Timestamp(t) => {
715                4u8.hash(state);
716                t.hash(state);
717            }
718            MetadataValue::StringArray(arr) => {
719                5u8.hash(state);
720                arr.hash(state);
721            }
722            MetadataValue::Null => {
723                6u8.hash(state);
724            }
725        }
726    }
727}
728
729impl Eq for MetadataValue {}
730
731#[cfg(test)]
732mod tests {
733    use super::*;
734
735    fn test_cid() -> Cid {
736        "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
737            .parse()
738            .unwrap()
739    }
740
741    fn test_cid2() -> Cid {
742        "bafybeiczsscdsbs7ffqz55asqdf3smv6klcw3gofszvwlyarci47bgf354"
743            .parse()
744            .unwrap()
745    }
746
747    #[test]
748    fn test_metadata_creation() {
749        let metadata = Metadata::new()
750            .with_string("type", "document")
751            .with_integer("size", 1024)
752            .with_boolean("indexed", true);
753
754        assert_eq!(
755            metadata.get("type"),
756            Some(&MetadataValue::String("document".to_string()))
757        );
758        assert_eq!(metadata.get("size"), Some(&MetadataValue::Integer(1024)));
759        assert_eq!(metadata.get("indexed"), Some(&MetadataValue::Boolean(true)));
760    }
761
762    #[test]
763    fn test_metadata_filter() {
764        let metadata = Metadata::new()
765            .with_string("category", "tech")
766            .with_integer("views", 100)
767            .with_timestamp("published", 1700000000);
768
769        // Equals filter
770        assert!(
771            MetadataFilter::eq("category", MetadataValue::String("tech".to_string()))
772                .matches(&metadata)
773        );
774
775        // Greater than filter
776        assert!(MetadataFilter::gt("views", MetadataValue::Integer(50)).matches(&metadata));
777        assert!(!MetadataFilter::gt("views", MetadataValue::Integer(200)).matches(&metadata));
778
779        // Time range filter
780        assert!(
781            MetadataFilter::time_range("published", Some(1699999999), Some(1700000001))
782                .matches(&metadata)
783        );
784    }
785
786    #[test]
787    fn test_metadata_store() {
788        let store = MetadataStore::new();
789
790        let cid1 = test_cid();
791        let cid2 = test_cid2();
792
793        let meta1 = Metadata::new()
794            .with_string("type", "image")
795            .with_integer("size", 1024);
796
797        let meta2 = Metadata::new()
798            .with_string("type", "document")
799            .with_integer("size", 2048);
800
801        store.insert(cid1, meta1).unwrap();
802        store.insert(cid2, meta2).unwrap();
803
804        assert_eq!(store.len(), 2);
805
806        // Filter by type
807        let filter = MetadataFilter::eq("type", MetadataValue::String("image".to_string()));
808        let results = store.filter(&filter);
809        assert_eq!(results.len(), 1);
810        assert_eq!(results[0], cid1);
811    }
812
813    #[test]
814    fn test_compound_filters() {
815        let metadata = Metadata::new()
816            .with_string("category", "tech")
817            .with_integer("views", 100)
818            .with_boolean("published", true);
819
820        // AND filter
821        let and_filter = MetadataFilter::and(vec![
822            MetadataFilter::eq("category", MetadataValue::String("tech".to_string())),
823            MetadataFilter::gt("views", MetadataValue::Integer(50)),
824        ]);
825        assert!(and_filter.matches(&metadata));
826
827        // OR filter
828        let or_filter = MetadataFilter::or(vec![
829            MetadataFilter::eq("category", MetadataValue::String("science".to_string())),
830            MetadataFilter::gt("views", MetadataValue::Integer(50)),
831        ]);
832        assert!(or_filter.matches(&metadata));
833
834        // NOT filter
835        let not_filter = MetadataFilter::negate(MetadataFilter::eq(
836            "published",
837            MetadataValue::Boolean(false),
838        ));
839        assert!(not_filter.matches(&metadata));
840    }
841
842    #[test]
843    fn test_temporal_options() {
844        let now = SystemTime::now()
845            .duration_since(UNIX_EPOCH)
846            .unwrap()
847            .as_secs();
848
849        let options = TemporalOptions {
850            recency_boost: true,
851            decay_factor: 1.0,
852            reference_time: Some(now),
853            ..Default::default()
854        };
855
856        // Recent timestamp should have high multiplier
857        let recent_mult = options.recency_multiplier(now - 86400); // 1 day ago
858        assert!(recent_mult > 0.9);
859
860        // Old timestamp should have low multiplier
861        let old_mult = options.recency_multiplier(now - 86400 * 90); // 90 days ago
862        assert!(old_mult < 0.5);
863    }
864
865    #[test]
866    fn test_facet_counts() {
867        let store = MetadataStore::new();
868
869        // Use known valid CIDs
870        let valid_cids = [
871            "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi",
872            "bafybeiczsscdsbs7ffqz55asqdf3smv6klcw3gofszvwlyarci47bgf354",
873            "bafybeibvfkifsqbapirjrj7zbfwddz5qz5awvbftjgktpcqcxjkzstszlm",
874        ];
875
876        for (i, cid_str) in valid_cids.iter().enumerate() {
877            let cid: Cid = cid_str.parse().unwrap();
878            let meta = Metadata::new().with_string("type", if i < 2 { "image" } else { "doc" });
879            store.insert(cid, meta).unwrap();
880        }
881
882        let counts = store.get_facet_counts("type");
883        assert_eq!(counts.get("image").copied().unwrap_or(0), 2);
884        assert_eq!(counts.get("doc").copied().unwrap_or(0), 1);
885    }
886}