use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use uuid::Uuid;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ClusterId(Uuid);
impl ClusterId {
#[must_use]
pub fn new() -> Self {
Self(Uuid::new_v4())
}
#[must_use]
pub fn from_uuid(uuid: Uuid) -> Self {
Self(uuid)
}
#[must_use]
pub fn as_uuid(&self) -> Uuid {
self.0
}
#[must_use]
pub fn noise() -> Self {
Self(Uuid::nil())
}
#[must_use]
pub fn is_noise(&self) -> bool {
self.0.is_nil()
}
}
impl Default for ClusterId {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for ClusterId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<Uuid> for ClusterId {
fn from(uuid: Uuid) -> Self {
Self(uuid)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct EmbeddingId(Uuid);
impl EmbeddingId {
#[must_use]
pub fn new() -> Self {
Self(Uuid::new_v4())
}
#[must_use]
pub fn from_uuid(uuid: Uuid) -> Self {
Self(uuid)
}
#[must_use]
pub fn as_uuid(&self) -> Uuid {
self.0
}
}
impl Default for EmbeddingId {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for EmbeddingId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<Uuid> for EmbeddingId {
fn from(uuid: Uuid) -> Self {
Self(uuid)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct RecordingId(Uuid);
impl RecordingId {
#[must_use]
pub fn new() -> Self {
Self(Uuid::new_v4())
}
#[must_use]
pub fn from_uuid(uuid: Uuid) -> Self {
Self(uuid)
}
#[must_use]
pub fn as_uuid(&self) -> Uuid {
self.0
}
}
impl Default for RecordingId {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for RecordingId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<Uuid> for RecordingId {
fn from(uuid: Uuid) -> Self {
Self(uuid)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct SegmentId(Uuid);
impl SegmentId {
#[must_use]
pub fn new() -> Self {
Self(Uuid::new_v4())
}
#[must_use]
pub fn from_uuid(uuid: Uuid) -> Self {
Self(uuid)
}
#[must_use]
pub fn as_uuid(&self) -> Uuid {
self.0
}
}
impl Default for SegmentId {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for SegmentId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<Uuid> for SegmentId {
fn from(uuid: Uuid) -> Self {
Self(uuid)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Cluster {
pub id: ClusterId,
pub prototype_id: EmbeddingId,
pub member_ids: Vec<EmbeddingId>,
pub centroid: Vec<f32>,
pub variance: f32,
pub label: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl Cluster {
#[must_use]
pub fn new(
prototype_id: EmbeddingId,
member_ids: Vec<EmbeddingId>,
centroid: Vec<f32>,
variance: f32,
) -> Self {
let now = Utc::now();
Self {
id: ClusterId::new(),
prototype_id,
member_ids,
centroid,
variance,
label: None,
created_at: now,
updated_at: now,
}
}
#[must_use]
pub fn member_count(&self) -> usize {
self.member_ids.len()
}
#[must_use]
pub fn contains(&self, embedding_id: &EmbeddingId) -> bool {
self.member_ids.contains(embedding_id)
}
pub fn add_member(&mut self, embedding_id: EmbeddingId) {
if !self.member_ids.contains(&embedding_id) {
self.member_ids.push(embedding_id);
self.updated_at = Utc::now();
}
}
pub fn remove_member(&mut self, embedding_id: &EmbeddingId) -> bool {
if let Some(pos) = self.member_ids.iter().position(|id| id == embedding_id) {
self.member_ids.remove(pos);
self.updated_at = Utc::now();
true
} else {
false
}
}
pub fn update_centroid(&mut self, centroid: Vec<f32>, variance: f32) {
self.centroid = centroid;
self.variance = variance;
self.updated_at = Utc::now();
}
pub fn set_label(&mut self, label: impl Into<String>) {
self.label = Some(label.into());
self.updated_at = Utc::now();
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Prototype {
pub id: EmbeddingId,
pub cluster_id: ClusterId,
pub exemplar_score: f32,
pub spectrogram_path: Option<PathBuf>,
pub created_at: DateTime<Utc>,
}
impl Prototype {
#[must_use]
pub fn new(
id: EmbeddingId,
cluster_id: ClusterId,
exemplar_score: f32,
) -> Self {
Self {
id,
cluster_id,
exemplar_score,
spectrogram_path: None,
created_at: Utc::now(),
}
}
pub fn set_spectrogram_path(&mut self, path: impl Into<PathBuf>) {
self.spectrogram_path = Some(path.into());
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Motif {
pub id: String,
pub sequence: Vec<ClusterId>,
pub occurrences: usize,
pub avg_duration_ms: f64,
pub confidence: f32,
pub occurrence_instances: Vec<MotifOccurrence>,
pub discovered_at: DateTime<Utc>,
}
impl Motif {
#[must_use]
pub fn new(
sequence: Vec<ClusterId>,
occurrences: usize,
avg_duration_ms: f64,
confidence: f32,
) -> Self {
Self {
id: Uuid::new_v4().to_string(),
sequence,
occurrences,
avg_duration_ms,
confidence,
occurrence_instances: Vec::new(),
discovered_at: Utc::now(),
}
}
#[must_use]
pub fn length(&self) -> usize {
self.sequence.len()
}
pub fn add_occurrence(&mut self, occurrence: MotifOccurrence) {
self.occurrence_instances.push(occurrence);
self.occurrences = self.occurrence_instances.len();
}
#[must_use]
pub fn contains_cluster(&self, cluster_id: &ClusterId) -> bool {
self.sequence.contains(cluster_id)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MotifOccurrence {
pub recording_id: RecordingId,
pub segment_ids: Vec<SegmentId>,
pub start_time_ms: u64,
pub end_time_ms: u64,
pub similarity: f32,
}
impl MotifOccurrence {
#[must_use]
pub fn new(
recording_id: RecordingId,
segment_ids: Vec<SegmentId>,
start_time_ms: u64,
end_time_ms: u64,
similarity: f32,
) -> Self {
Self {
recording_id,
segment_ids,
start_time_ms,
end_time_ms,
similarity,
}
}
#[must_use]
pub fn duration_ms(&self) -> u64 {
self.end_time_ms.saturating_sub(self.start_time_ms)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SequenceAnalysis {
pub recording_id: RecordingId,
pub transitions: Vec<(ClusterId, ClusterId, f32)>,
pub entropy: f32,
pub stereotypy_score: f32,
pub cluster_sequence: Vec<ClusterId>,
pub segment_ids: Vec<SegmentId>,
pub analyzed_at: DateTime<Utc>,
}
impl SequenceAnalysis {
#[must_use]
pub fn new(
recording_id: RecordingId,
transitions: Vec<(ClusterId, ClusterId, f32)>,
entropy: f32,
stereotypy_score: f32,
) -> Self {
Self {
recording_id,
transitions,
entropy,
stereotypy_score,
cluster_sequence: Vec::new(),
segment_ids: Vec::new(),
analyzed_at: Utc::now(),
}
}
#[must_use]
pub fn unique_transition_count(&self) -> usize {
self.transitions.len()
}
#[must_use]
pub fn unique_clusters(&self) -> Vec<ClusterId> {
let mut clusters: Vec<ClusterId> = self.cluster_sequence.clone();
clusters.sort_by_key(|c| c.as_uuid());
clusters.dedup();
clusters
}
pub fn set_sequence(&mut self, clusters: Vec<ClusterId>, segments: Vec<SegmentId>) {
self.cluster_sequence = clusters;
self.segment_ids = segments;
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum AnomalyType {
Rare,
Novel,
Artifact,
Outlier,
Unknown,
}
impl std::fmt::Display for AnomalyType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AnomalyType::Rare => write!(f, "Rare"),
AnomalyType::Novel => write!(f, "Novel"),
AnomalyType::Artifact => write!(f, "Artifact"),
AnomalyType::Outlier => write!(f, "Outlier"),
AnomalyType::Unknown => write!(f, "Unknown"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Anomaly {
pub embedding_id: EmbeddingId,
pub anomaly_score: f32,
pub nearest_cluster: ClusterId,
pub distance_to_centroid: f32,
pub anomaly_type: AnomalyType,
pub local_outlier_factor: Option<f32>,
pub detected_at: DateTime<Utc>,
}
impl Anomaly {
#[must_use]
pub fn new(
embedding_id: EmbeddingId,
anomaly_score: f32,
nearest_cluster: ClusterId,
distance_to_centroid: f32,
) -> Self {
Self {
embedding_id,
anomaly_score,
nearest_cluster,
distance_to_centroid,
anomaly_type: AnomalyType::Unknown,
local_outlier_factor: None,
detected_at: Utc::now(),
}
}
pub fn set_type(&mut self, anomaly_type: AnomalyType) {
self.anomaly_type = anomaly_type;
}
pub fn set_lof(&mut self, lof: f32) {
self.local_outlier_factor = Some(lof);
}
#[must_use]
pub fn is_severe(&self, threshold: f32) -> bool {
self.anomaly_score > threshold
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cluster_id_creation() {
let id1 = ClusterId::new();
let id2 = ClusterId::new();
assert_ne!(id1, id2);
let noise = ClusterId::noise();
assert!(noise.is_noise());
assert!(!id1.is_noise());
}
#[test]
fn test_cluster_member_operations() {
let mut cluster = Cluster::new(
EmbeddingId::new(),
vec![EmbeddingId::new()],
vec![0.0; 1536],
0.1,
);
let new_member = EmbeddingId::new();
cluster.add_member(new_member);
assert_eq!(cluster.member_count(), 2);
assert!(cluster.contains(&new_member));
cluster.remove_member(&new_member);
assert_eq!(cluster.member_count(), 1);
assert!(!cluster.contains(&new_member));
}
#[test]
fn test_motif_length() {
let motif = Motif::new(
vec![ClusterId::new(), ClusterId::new(), ClusterId::new()],
5,
1500.0,
0.85,
);
assert_eq!(motif.length(), 3);
assert_eq!(motif.occurrences, 5);
}
#[test]
fn test_sequence_analysis_unique_clusters() {
let c1 = ClusterId::new();
let c2 = ClusterId::new();
let mut analysis = SequenceAnalysis::new(
RecordingId::new(),
vec![],
1.5,
0.3,
);
analysis.set_sequence(
vec![c1, c2, c1, c2, c1],
vec![SegmentId::new(); 5],
);
let unique = analysis.unique_clusters();
assert_eq!(unique.len(), 2);
}
#[test]
fn test_anomaly_severity() {
let mut anomaly = Anomaly::new(
EmbeddingId::new(),
0.8,
ClusterId::new(),
2.5,
);
assert!(anomaly.is_severe(0.5));
assert!(!anomaly.is_severe(0.9));
anomaly.set_type(AnomalyType::Novel);
assert_eq!(anomaly.anomaly_type, AnomalyType::Novel);
}
}