Skip to main content

converge_knowledge/ingest/
multimodal.rs

1//! Multi-modal embedding descriptors.
2//!
3//! This module provides storage/indexing descriptors for modality-specific
4//! vectors without requiring a concrete embedding backend at this stage.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9/// Supported content modalities for embedding/indexing.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11#[serde(rename_all = "snake_case")]
12pub enum Modality {
13    /// Text content.
14    Text,
15    /// Image content.
16    Image,
17    /// Audio content.
18    Audio,
19    /// Video content.
20    Video,
21}
22
23impl Modality {
24    /// Stable string name used in IDs and metadata.
25    pub fn as_str(self) -> &'static str {
26        match self {
27            Self::Text => "text",
28            Self::Image => "image",
29            Self::Audio => "audio",
30            Self::Video => "video",
31        }
32    }
33}
34
35/// Optional temporal span describing a source interval for audio/video vectors.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
37pub struct TemporalSpan {
38    /// Start timestamp in milliseconds.
39    pub start_ms: u64,
40    /// End timestamp in milliseconds.
41    pub end_ms: u64,
42}
43
44impl TemporalSpan {
45    /// Returns `true` when the span is non-empty.
46    pub fn is_valid(&self) -> bool {
47        self.end_ms > self.start_ms
48    }
49}
50
51/// Location of a vector in a storage/index backend.
52#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
53pub struct EmbeddingLocation {
54    /// Opaque vector identifier in the vector index/store.
55    pub vector_id: String,
56    /// Embedding model name or alias.
57    pub model: String,
58    /// Vector dimensionality.
59    pub dimensions: usize,
60}
61
62impl EmbeddingLocation {
63    /// Create a vector location descriptor.
64    pub fn new(vector_id: impl Into<String>, model: impl Into<String>, dimensions: usize) -> Self {
65        Self {
66            vector_id: vector_id.into(),
67            model: model.into(),
68            dimensions,
69        }
70    }
71}
72
73/// Descriptor connecting a modality-specific vector to source content.
74#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
75pub struct EmbeddingDescriptor {
76    /// Modality this vector represents.
77    pub modality: Modality,
78    /// Logical source content ID (entry/chunk/asset identifier).
79    pub source_content_id: String,
80    /// Source sub-part identifier (e.g., block/chunk/frame/segment).
81    pub part_id: Option<String>,
82    /// Optional time span for audio/video-derived vectors.
83    pub temporal_span: Option<TemporalSpan>,
84    /// Where the vector is stored.
85    pub location: EmbeddingLocation,
86    /// Additional modality-specific metadata.
87    pub metadata: HashMap<String, String>,
88}
89
90impl EmbeddingDescriptor {
91    /// Create a new embedding descriptor.
92    pub fn new(
93        modality: Modality,
94        source_content_id: impl Into<String>,
95        location: EmbeddingLocation,
96    ) -> Self {
97        Self {
98            modality,
99            source_content_id: source_content_id.into(),
100            part_id: None,
101            temporal_span: None,
102            location,
103            metadata: HashMap::new(),
104        }
105    }
106
107    /// Set the source part ID.
108    pub fn with_part_id(mut self, part_id: impl Into<String>) -> Self {
109        self.part_id = Some(part_id.into());
110        self
111    }
112
113    /// Set the temporal span.
114    pub fn with_temporal_span(mut self, temporal_span: TemporalSpan) -> Self {
115        self.temporal_span = Some(temporal_span);
116        self
117    }
118
119    /// Deterministic key for upsert/idempotent indexing.
120    pub fn descriptor_key(&self) -> String {
121        let part = self.part_id.as_deref().unwrap_or("root");
122        match self.temporal_span {
123            Some(span) => format!(
124                "{}:{}:{}:{}-{}",
125                self.modality.as_str(),
126                self.source_content_id,
127                part,
128                span.start_ms,
129                span.end_ms
130            ),
131            None => format!(
132                "{}:{}:{}",
133                self.modality.as_str(),
134                self.source_content_id,
135                part
136            ),
137        }
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    #[test]
146    fn descriptor_key_includes_temporal_span_when_present() {
147        let descriptor = EmbeddingDescriptor::new(
148            Modality::Audio,
149            "entry-1",
150            EmbeddingLocation::new("vec-1", "mock-audio", 128),
151        )
152        .with_part_id("seg-3")
153        .with_temporal_span(TemporalSpan {
154            start_ms: 1_000,
155            end_ms: 2_500,
156        });
157
158        assert_eq!(
159            descriptor.descriptor_key(),
160            "audio:entry-1:seg-3:1000-2500".to_string()
161        );
162    }
163
164    #[test]
165    fn temporal_span_validation_rejects_empty_spans() {
166        assert!(
167            !TemporalSpan {
168                start_ms: 10,
169                end_ms: 10
170            }
171            .is_valid()
172        );
173        assert!(
174            TemporalSpan {
175                start_ms: 10,
176                end_ms: 11
177            }
178            .is_valid()
179        );
180    }
181}