converge_knowledge/ingest/
multimodal.rs1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11#[serde(rename_all = "snake_case")]
12pub enum Modality {
13 Text,
15 Image,
17 Audio,
19 Video,
21}
22
23impl Modality {
24 pub fn as_str(self) -> &'static str {
26 match self {
27 Self::Text => "text",
28 Self::Image => "image",
29 Self::Audio => "audio",
30 Self::Video => "video",
31 }
32 }
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
37pub struct TemporalSpan {
38 pub start_ms: u64,
40 pub end_ms: u64,
42}
43
44impl TemporalSpan {
45 pub fn is_valid(&self) -> bool {
47 self.end_ms > self.start_ms
48 }
49}
50
51#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
53pub struct EmbeddingLocation {
54 pub vector_id: String,
56 pub model: String,
58 pub dimensions: usize,
60}
61
62impl EmbeddingLocation {
63 pub fn new(vector_id: impl Into<String>, model: impl Into<String>, dimensions: usize) -> Self {
65 Self {
66 vector_id: vector_id.into(),
67 model: model.into(),
68 dimensions,
69 }
70 }
71}
72
73#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
75pub struct EmbeddingDescriptor {
76 pub modality: Modality,
78 pub source_content_id: String,
80 pub part_id: Option<String>,
82 pub temporal_span: Option<TemporalSpan>,
84 pub location: EmbeddingLocation,
86 pub metadata: HashMap<String, String>,
88}
89
90impl EmbeddingDescriptor {
91 pub fn new(
93 modality: Modality,
94 source_content_id: impl Into<String>,
95 location: EmbeddingLocation,
96 ) -> Self {
97 Self {
98 modality,
99 source_content_id: source_content_id.into(),
100 part_id: None,
101 temporal_span: None,
102 location,
103 metadata: HashMap::new(),
104 }
105 }
106
107 pub fn with_part_id(mut self, part_id: impl Into<String>) -> Self {
109 self.part_id = Some(part_id.into());
110 self
111 }
112
113 pub fn with_temporal_span(mut self, temporal_span: TemporalSpan) -> Self {
115 self.temporal_span = Some(temporal_span);
116 self
117 }
118
119 pub fn descriptor_key(&self) -> String {
121 let part = self.part_id.as_deref().unwrap_or("root");
122 match self.temporal_span {
123 Some(span) => format!(
124 "{}:{}:{}:{}-{}",
125 self.modality.as_str(),
126 self.source_content_id,
127 part,
128 span.start_ms,
129 span.end_ms
130 ),
131 None => format!(
132 "{}:{}:{}",
133 self.modality.as_str(),
134 self.source_content_id,
135 part
136 ),
137 }
138 }
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 #[test]
146 fn descriptor_key_includes_temporal_span_when_present() {
147 let descriptor = EmbeddingDescriptor::new(
148 Modality::Audio,
149 "entry-1",
150 EmbeddingLocation::new("vec-1", "mock-audio", 128),
151 )
152 .with_part_id("seg-3")
153 .with_temporal_span(TemporalSpan {
154 start_ms: 1_000,
155 end_ms: 2_500,
156 });
157
158 assert_eq!(
159 descriptor.descriptor_key(),
160 "audio:entry-1:seg-3:1000-2500".to_string()
161 );
162 }
163
164 #[test]
165 fn temporal_span_validation_rejects_empty_spans() {
166 assert!(
167 !TemporalSpan {
168 start_ms: 10,
169 end_ms: 10
170 }
171 .is_valid()
172 );
173 assert!(
174 TemporalSpan {
175 start_ms: 10,
176 end_ms: 11
177 }
178 .is_valid()
179 );
180 }
181}