use crate::capture::CapturedFrame;
use crate::config::StorageConfig;
use crate::error::{OsPipeError, Result};
use crate::storage::embedding::cosine_similarity;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Debug, Clone)]
pub struct StoredEmbedding {
pub id: Uuid,
pub vector: Vec<f32>,
pub metadata: serde_json::Value,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub id: Uuid,
pub score: f32,
pub metadata: serde_json::Value,
}
#[derive(Debug, Clone, Default)]
pub struct SearchFilter {
pub app: Option<String>,
pub time_start: Option<DateTime<Utc>>,
pub time_end: Option<DateTime<Utc>>,
pub content_type: Option<String>,
pub monitor: Option<u32>,
}
pub struct VectorStore {
config: StorageConfig,
embeddings: Vec<StoredEmbedding>,
dimension: usize,
}
impl VectorStore {
pub fn new(config: StorageConfig) -> Result<Self> {
let dimension = config.embedding_dim;
if dimension == 0 {
return Err(OsPipeError::Storage(
"embedding_dim must be greater than 0".to_string(),
));
}
Ok(Self {
config,
embeddings: Vec::new(),
dimension,
})
}
pub fn insert(&mut self, frame: &CapturedFrame, embedding: &[f32]) -> Result<()> {
if embedding.len() != self.dimension {
return Err(OsPipeError::Storage(format!(
"Expected embedding dimension {}, got {}",
self.dimension,
embedding.len()
)));
}
let metadata = serde_json::json!({
"text": frame.text_content(),
"content_type": frame.content_type(),
"app_name": frame.metadata.app_name,
"window_title": frame.metadata.window_title,
"monitor_id": frame.metadata.monitor_id,
"confidence": frame.metadata.confidence,
});
self.embeddings.push(StoredEmbedding {
id: frame.id,
vector: embedding.to_vec(),
metadata,
timestamp: frame.timestamp,
});
Ok(())
}
pub fn search(&self, query_embedding: &[f32], k: usize) -> Result<Vec<SearchResult>> {
if query_embedding.len() != self.dimension {
return Err(OsPipeError::Search(format!(
"Expected query dimension {}, got {}",
self.dimension,
query_embedding.len()
)));
}
let mut scored: Vec<(usize, f32)> = self
.embeddings
.iter()
.enumerate()
.map(|(i, stored)| {
let score = cosine_similarity(query_embedding, &stored.vector);
(i, score)
})
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(k);
Ok(scored
.into_iter()
.map(|(i, score)| {
let stored = &self.embeddings[i];
SearchResult {
id: stored.id,
score,
metadata: stored.metadata.clone(),
}
})
.collect())
}
pub fn search_filtered(
&self,
query: &[f32],
k: usize,
filter: &SearchFilter,
) -> Result<Vec<SearchResult>> {
if query.len() != self.dimension {
return Err(OsPipeError::Search(format!(
"Expected query dimension {}, got {}",
self.dimension,
query.len()
)));
}
let mut scored: Vec<(usize, f32)> = self
.embeddings
.iter()
.enumerate()
.filter(|(_, stored)| matches_filter(stored, filter))
.map(|(i, stored)| {
let score = cosine_similarity(query, &stored.vector);
(i, score)
})
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(k);
Ok(scored
.into_iter()
.map(|(i, score)| {
let stored = &self.embeddings[i];
SearchResult {
id: stored.id,
score,
metadata: stored.metadata.clone(),
}
})
.collect())
}
pub fn delete(&mut self, id: &Uuid) -> Result<bool> {
let before = self.embeddings.len();
self.embeddings.retain(|e| e.id != *id);
Ok(self.embeddings.len() < before)
}
pub fn update_metadata(&mut self, id: &Uuid, metadata: serde_json::Value) -> Result<()> {
match self.embeddings.iter_mut().find(|e| e.id == *id) {
Some(entry) => {
entry.metadata = metadata;
Ok(())
}
None => Err(OsPipeError::Storage(format!(
"No embedding found with id {}",
id
))),
}
}
pub fn len(&self) -> usize {
self.embeddings.len()
}
pub fn is_empty(&self) -> bool {
self.embeddings.is_empty()
}
pub fn dimension(&self) -> usize {
self.dimension
}
pub fn config(&self) -> &StorageConfig {
&self.config
}
pub fn get(&self, id: &Uuid) -> Option<&StoredEmbedding> {
self.embeddings.iter().find(|e| e.id == *id)
}
}
#[cfg(not(target_arch = "wasm32"))]
mod native {
use super::*;
use ruvector_core::index::hnsw::HnswIndex;
use ruvector_core::index::VectorIndex;
use ruvector_core::types::{DistanceMetric, HnswConfig};
use std::collections::HashMap;
pub struct HnswVectorStore {
index: HnswIndex,
entries: HashMap<Uuid, StoredEmbedding>,
dimension: usize,
config: StorageConfig,
ef_search: usize,
}
impl HnswVectorStore {
pub fn new(config: StorageConfig) -> Result<Self> {
let dimension = config.embedding_dim;
if dimension == 0 {
return Err(OsPipeError::Storage(
"embedding_dim must be greater than 0".to_string(),
));
}
let hnsw_config = HnswConfig {
m: config.hnsw_m,
ef_construction: config.hnsw_ef_construction,
ef_search: config.hnsw_ef_search,
max_elements: 10_000_000,
};
let index =
HnswIndex::new(dimension, DistanceMetric::Cosine, hnsw_config).map_err(|e| {
OsPipeError::Storage(format!("Failed to create HNSW index: {}", e))
})?;
let ef_search = config.hnsw_ef_search;
Ok(Self {
index,
entries: HashMap::new(),
dimension,
config,
ef_search,
})
}
pub fn insert(&mut self, frame: &CapturedFrame, embedding: &[f32]) -> Result<()> {
if embedding.len() != self.dimension {
return Err(OsPipeError::Storage(format!(
"Expected embedding dimension {}, got {}",
self.dimension,
embedding.len()
)));
}
let metadata = serde_json::json!({
"text": frame.text_content(),
"content_type": frame.content_type(),
"app_name": frame.metadata.app_name,
"window_title": frame.metadata.window_title,
"monitor_id": frame.metadata.monitor_id,
"confidence": frame.metadata.confidence,
});
let id_str = frame.id.to_string();
self.index
.add(id_str, embedding.to_vec())
.map_err(|e| OsPipeError::Storage(format!("HNSW insert failed: {}", e)))?;
self.entries.insert(
frame.id,
StoredEmbedding {
id: frame.id,
vector: embedding.to_vec(),
metadata,
timestamp: frame.timestamp,
},
);
Ok(())
}
pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
if query.len() != self.dimension {
return Err(OsPipeError::Search(format!(
"Expected query dimension {}, got {}",
self.dimension,
query.len()
)));
}
let hnsw_results = self
.index
.search_with_ef(query, k, self.ef_search)
.map_err(|e| OsPipeError::Search(format!("HNSW search failed: {}", e)))?;
let mut results = Vec::with_capacity(hnsw_results.len());
for hr in hnsw_results {
if let Ok(uuid) = Uuid::parse_str(&hr.id) {
if let Some(stored) = self.entries.get(&uuid) {
let similarity = 1.0 - hr.score;
results.push(SearchResult {
id: uuid,
score: similarity,
metadata: stored.metadata.clone(),
});
}
}
}
results.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(results)
}
pub fn search_filtered(
&self,
query: &[f32],
k: usize,
filter: &SearchFilter,
) -> Result<Vec<SearchResult>> {
let over_k = (k * 4).max(k + 20);
let candidates = self.search(query, over_k)?;
let mut filtered: Vec<SearchResult> = candidates
.into_iter()
.filter(|r| {
if let Some(stored) = self.entries.get(&r.id) {
matches_filter(stored, filter)
} else {
false
}
})
.take(k)
.collect();
filtered.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(filtered)
}
pub fn delete(&mut self, id: &Uuid) -> Result<bool> {
let id_str = id.to_string();
let removed_from_index = self
.index
.remove(&id_str)
.map_err(|e| OsPipeError::Storage(format!("HNSW delete failed: {}", e)))?;
let removed_from_entries = self.entries.remove(id).is_some();
Ok(removed_from_index || removed_from_entries)
}
pub fn update_metadata(&mut self, id: &Uuid, metadata: serde_json::Value) -> Result<()> {
match self.entries.get_mut(id) {
Some(entry) => {
entry.metadata = metadata;
Ok(())
}
None => Err(OsPipeError::Storage(format!(
"No embedding found with id {}",
id
))),
}
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn dimension(&self) -> usize {
self.dimension
}
pub fn config(&self) -> &StorageConfig {
&self.config
}
pub fn get(&self, id: &Uuid) -> Option<&StoredEmbedding> {
self.entries.get(id)
}
}
}
#[cfg(not(target_arch = "wasm32"))]
pub use native::HnswVectorStore;
fn matches_filter(stored: &StoredEmbedding, filter: &SearchFilter) -> bool {
if let Some(ref app) = filter.app {
let stored_app = stored
.metadata
.get("app_name")
.and_then(|v| v.as_str())
.unwrap_or("");
if stored_app != app {
return false;
}
}
if let Some(start) = filter.time_start {
if stored.timestamp < start {
return false;
}
}
if let Some(end) = filter.time_end {
if stored.timestamp > end {
return false;
}
}
if let Some(ref ct) = filter.content_type {
let stored_ct = stored
.metadata
.get("content_type")
.and_then(|v| v.as_str())
.unwrap_or("");
if stored_ct != ct {
return false;
}
}
if let Some(monitor) = filter.monitor {
let stored_monitor = stored
.metadata
.get("monitor_id")
.and_then(|v| v.as_u64())
.map(|v| v as u32);
if stored_monitor != Some(monitor) {
return false;
}
}
true
}