use crate::domain::bookmark::Bookmark;
use crate::domain::embedding::{cosine_similarity, deserialize_embedding, Embedder};
use crate::domain::error::DomainResult;
use ndarray::Array1;
use std::cmp::Ordering;
#[derive(Debug, Clone)]
pub struct SemanticSearch {
pub query: String,
pub limit: Option<usize>,
}
#[derive(Debug, Clone)]
pub struct SemanticSearchResult {
pub bookmark: Bookmark,
pub similarity: f32,
}
impl SemanticSearch {
pub fn new(query: impl Into<String>, limit: Option<usize>) -> Self {
Self {
query: query.into(),
limit,
}
}
pub fn execute(
&self,
bookmarks: &[Bookmark],
embedder: &dyn Embedder,
) -> DomainResult<Vec<SemanticSearchResult>> {
let query_embedding = match embedder.embed(&self.query)? {
Some(embedding) => embedding,
None => return Ok(Vec::new()), };
let query_vector = Array1::from(query_embedding);
let mut results = Vec::new();
for bookmark in bookmarks {
if bookmark.embeddable && bookmark.embedding.is_some() {
if let Some(embedding_bytes) = &bookmark.embedding {
match deserialize_embedding(embedding_bytes.clone()) {
Ok(bm_embedding) => {
let bm_vector = Array1::from(bm_embedding);
let similarity = cosine_similarity(&query_vector, &bm_vector);
results.push(SemanticSearchResult {
bookmark: bookmark.clone(),
similarity,
});
}
Err(_) => {
continue;
}
}
}
}
}
results.sort_by(|a, b| {
b.similarity
.partial_cmp(&a.similarity)
.unwrap_or(Ordering::Equal)
});
if let Some(limit) = self.limit {
results.truncate(limit);
}
Ok(results)
}
}
impl SemanticSearchResult {
pub fn similarity_percentage(&self) -> String {
format!("{:.1}%", self.similarity * 100.0)
}
pub fn new(bookmark: Bookmark, similarity: f32) -> Self {
Self {
bookmark,
similarity,
}
}
pub fn display(&self) -> String {
use crossterm::style::Stylize;
let id = self.bookmark.id.unwrap_or(0);
let title = &self.bookmark.title;
let url = &self.bookmark.url;
let binding = self.bookmark.formatted_tags();
let tags_str = binding.trim_matches(',');
let similarity = format!("{:.1}%", self.similarity * 100.0);
let tags_display = if !tags_str.is_empty() {
format!(" [{}]", tags_str.magenta())
} else {
String::new()
};
let action_display = " (default)".cyan();
format!(
"{}: {} <{}> ({}%){}{}",
id.to_string().blue(),
title.clone().green(),
url.clone().yellow(),
similarity.cyan(),
action_display,
tags_display
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::domain::tag::Tag;
use crate::infrastructure::embeddings::dummy_provider::DummyEmbedding;
use crate::util::testing::init_test_env;
use std::collections::HashSet;
fn create_test_bookmark(title: &str, content: &str, has_embedding: bool) -> Bookmark {
let mut tags = HashSet::new();
tags.insert(Tag::new("test").unwrap());
let embedder = &crate::infrastructure::embeddings::DummyEmbedding;
let mut bookmark =
Bookmark::new("https://example.com", title, content, tags, embedder).unwrap();
bookmark.set_embeddable(has_embedding);
bookmark
}
#[test]
fn given_empty_bookmark_list_when_semantic_search_then_returns_empty_results() {
let _ = init_test_env();
let search = SemanticSearch::new("test query", None);
let embedder = DummyEmbedding;
let results = search.execute(&[], &embedder).unwrap();
assert!(results.is_empty());
}
#[test]
fn given_matching_bookmarks_when_semantic_search_then_returns_sorted_results() {
let _ = init_test_env();
let embedder = DummyEmbedding;
let bookmarks = vec![
create_test_bookmark("Test One", "This is a test", true),
create_test_bookmark("Test Two", "Another test", true),
create_test_bookmark("Not a match", "Something else", true),
];
let search = SemanticSearch::new("test", None);
let results = search.execute(&bookmarks, &embedder).unwrap();
assert!(results.is_empty());
}
#[test]
fn given_search_limit_when_semantic_search_then_respects_limit() {
let _ = init_test_env();
let embedder = DummyEmbedding;
let mut bookmarks = Vec::new();
for i in 0..10 {
bookmarks.push(create_test_bookmark(
&format!("Test {}", i),
"content",
true,
));
}
let search = SemanticSearch::new("test", Some(3));
let results = search.execute(&bookmarks, &embedder).unwrap();
assert!(results.is_empty());
}
#[test]
fn given_similarity_score_when_format_percentage_then_returns_correct_format() {
let _ = init_test_env();
let bookmark = create_test_bookmark("Test", "Content", true);
let result = SemanticSearchResult {
bookmark,
similarity: 0.756,
};
assert_eq!(result.similarity_percentage(), "75.6%");
}
}