use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Modality {
Text,
Image,
Audio,
Video,
}
impl Modality {
pub fn as_str(self) -> &'static str {
match self {
Self::Text => "text",
Self::Image => "image",
Self::Audio => "audio",
Self::Video => "video",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub struct TemporalSpan {
pub start_ms: u64,
pub end_ms: u64,
}
impl TemporalSpan {
pub fn is_valid(&self) -> bool {
self.end_ms > self.start_ms
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EmbeddingLocation {
pub vector_id: String,
pub model: String,
pub dimensions: usize,
}
impl EmbeddingLocation {
pub fn new(vector_id: impl Into<String>, model: impl Into<String>, dimensions: usize) -> Self {
Self {
vector_id: vector_id.into(),
model: model.into(),
dimensions,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EmbeddingDescriptor {
pub modality: Modality,
pub source_content_id: String,
pub part_id: Option<String>,
pub temporal_span: Option<TemporalSpan>,
pub location: EmbeddingLocation,
pub metadata: HashMap<String, String>,
}
impl EmbeddingDescriptor {
pub fn new(
modality: Modality,
source_content_id: impl Into<String>,
location: EmbeddingLocation,
) -> Self {
Self {
modality,
source_content_id: source_content_id.into(),
part_id: None,
temporal_span: None,
location,
metadata: HashMap::new(),
}
}
pub fn with_part_id(mut self, part_id: impl Into<String>) -> Self {
self.part_id = Some(part_id.into());
self
}
pub fn with_temporal_span(mut self, temporal_span: TemporalSpan) -> Self {
self.temporal_span = Some(temporal_span);
self
}
pub fn descriptor_key(&self) -> String {
let part = self.part_id.as_deref().unwrap_or("root");
match self.temporal_span {
Some(span) => format!(
"{}:{}:{}:{}-{}",
self.modality.as_str(),
self.source_content_id,
part,
span.start_ms,
span.end_ms
),
None => format!(
"{}:{}:{}",
self.modality.as_str(),
self.source_content_id,
part
),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn descriptor_key_includes_temporal_span_when_present() {
let descriptor = EmbeddingDescriptor::new(
Modality::Audio,
"entry-1",
EmbeddingLocation::new("vec-1", "mock-audio", 128),
)
.with_part_id("seg-3")
.with_temporal_span(TemporalSpan {
start_ms: 1_000,
end_ms: 2_500,
});
assert_eq!(
descriptor.descriptor_key(),
"audio:entry-1:seg-3:1000-2500".to_string()
);
}
#[test]
fn temporal_span_validation_rejects_empty_spans() {
assert!(
!TemporalSpan {
start_ms: 10,
end_ms: 10
}
.is_valid()
);
assert!(
TemporalSpan {
start_ms: 10,
end_ms: 11
}
.is_valid()
);
}
}