sevensense_embedding/lib.rs
1//! # sevensense-embedding
2//!
3//! Embedding bounded context for 7sense bioacoustics platform.
4//!
5//! This crate provides Perch 2.0 ONNX integration for generating 1536-dimensional
6//! embeddings from preprocessed audio segments. It handles model loading, inference,
7//! normalization, and quantization for efficient storage and retrieval.
8//!
9//! ## Architecture
10//!
11//! The crate follows Domain-Driven Design (DDD) principles:
12//!
13//! - **Domain Layer**: Core entities (`Embedding`, `EmbeddingModel`) and repository traits
14//! - **Application Layer**: Services for embedding generation and batch processing
15//! - **Infrastructure Layer**: ONNX Runtime integration and model management
16//!
17//! ## Usage
18//!
19//! ```rust,ignore
20//! use sevensense_embedding::{
21//! EmbeddingService, ModelManager, ModelConfig,
22//! domain::Embedding,
23//! };
24//!
25//! // Initialize model manager
26//! let config = ModelConfig::default();
27//! let model_manager = ModelManager::new(config)?;
28//!
29//! // Create embedding service
30//! let service = EmbeddingService::new(model_manager, 8);
31//!
32//! // Generate embedding from spectrogram
33//! let embedding = service.embed_segment(&spectrogram).await?;
34//! ```
35//!
36//! ## Features
37//!
38//! - **Perch 2.0 Integration**: Full support for EfficientNet-B3 bioacoustic embeddings
39//! - **Batch Processing**: Efficient batch inference with configurable batch sizes
40//! - **Model Hot-Swap**: Update models without service restart
41//! - **Quantization**: F16 and INT8 quantization for reduced storage
42//! - **Validation**: Comprehensive embedding validation (NaN detection, dimension checks)
43
44#![warn(missing_docs)]
45#![warn(clippy::all)]
46#![warn(clippy::pedantic)]
47#![allow(clippy::module_name_repetitions)]
48
49pub mod domain;
50pub mod application;
51pub mod infrastructure;
52pub mod normalization;
53pub mod quantization;
54
55// Re-export main types for convenience
56pub use domain::entities::{
57 Embedding, EmbeddingId, EmbeddingModel, EmbeddingMetadata,
58 StorageTier, ModelVersion, InputSpecification,
59};
60pub use domain::repository::EmbeddingRepository;
61pub use application::services::EmbeddingService;
62pub use infrastructure::model_manager::{ModelManager, ModelConfig};
63pub use infrastructure::onnx_inference::OnnxInference;
64
65/// Embedding dimension for Perch 2.0 model
66pub const EMBEDDING_DIM: usize = 1536;
67
68/// Target sample rate for Perch 2.0 (32kHz)
69pub const TARGET_SAMPLE_RATE: u32 = 32000;
70
71/// Target window duration in seconds (5s)
72pub const TARGET_WINDOW_SECONDS: f32 = 5.0;
73
74/// Target window samples (160,000 = 5s at 32kHz)
75pub const TARGET_WINDOW_SAMPLES: usize = 160_000;
76
77/// Mel spectrogram bins for Perch 2.0
78pub const MEL_BINS: usize = 128;
79
80/// Mel spectrogram frames for Perch 2.0
81pub const MEL_FRAMES: usize = 500;
82
83/// Crate version information
84pub const VERSION: &str = env!("CARGO_PKG_VERSION");
85
86/// Common result type for embedding operations
87pub type Result<T> = std::result::Result<T, EmbeddingError>;
88
89/// Unified error type for embedding operations
90#[derive(Debug, thiserror::Error)]
91pub enum EmbeddingError {
92 /// Model loading or initialization error
93 #[error("Model error: {0}")]
94 Model(#[from] infrastructure::model_manager::ModelError),
95
96 /// ONNX inference error
97 #[error("Inference error: {0}")]
98 Inference(#[from] infrastructure::onnx_inference::InferenceError),
99
100 /// Embedding validation error
101 #[error("Validation error: {0}")]
102 Validation(String),
103
104 /// Invalid input dimensions
105 #[error("Invalid dimensions: expected {expected}, got {actual}")]
106 InvalidDimensions {
107 /// Expected dimension
108 expected: usize,
109 /// Actual dimension
110 actual: usize,
111 },
112
113 /// Repository error
114 #[error("Repository error: {0}")]
115 Repository(String),
116
117 /// IO error
118 #[error("IO error: {0}")]
119 Io(#[from] std::io::Error),
120
121 /// Checksum verification failed
122 #[error("Checksum mismatch: expected {expected}, got {actual}")]
123 ChecksumMismatch {
124 /// Expected checksum
125 expected: String,
126 /// Actual checksum
127 actual: String,
128 },
129}
130
131#[cfg(test)]
132mod tests {
133 use super::*;
134
135 #[test]
136 fn test_constants() {
137 assert_eq!(EMBEDDING_DIM, 1536);
138 assert_eq!(TARGET_SAMPLE_RATE, 32000);
139 assert_eq!(TARGET_WINDOW_SAMPLES, 160_000);
140 assert_eq!(MEL_BINS, 128);
141 assert_eq!(MEL_FRAMES, 500);
142 }
143}