use std::collections::HashSet;
use std::sync::Arc;
use crate::application::error::{ApplicationError, ApplicationResult};
use crate::application::services::bookmark_service::BookmarkService;
use crate::domain::bookmark::{build_embedding_content, Bookmark, BookmarkBuilder};
use crate::domain::embedding::Embedder;
use crate::domain::error_context::ApplicationErrorContext;
use crate::domain::repositories::import_repository::{
BookmarkImportData, FileImportData, ImportRepository,
};
use crate::domain::repositories::query::{BookmarkQuery, SortCriteria, SortDirection, SortField};
use crate::domain::repositories::repository::BookmarkRepository;
use crate::domain::repositories::vector_repository::VectorRepository;
use crate::domain::search::{
HybridSearch, HybridSearchResult, RrfFusion, SemanticSearch, SemanticSearchResult,
};
use crate::domain::tag::Tag;
use crate::infrastructure::http;
use crate::util::helper::calc_content_hash;
use crate::util::validation::ValidationHelper;
use std::path::Path;
use tracing::{debug, instrument, warn};
#[derive(Debug)]
pub struct BookmarkServiceImpl<R: BookmarkRepository> {
repository: Arc<R>,
embedder: Arc<dyn Embedder>,
vector_repository: Arc<dyn VectorRepository>,
import_repository: Arc<dyn ImportRepository>,
}
impl<R: BookmarkRepository> BookmarkServiceImpl<R> {
pub fn new(
repository: Arc<R>,
embedder: Arc<dyn Embedder>,
vector_repository: Arc<dyn VectorRepository>,
import_repository: Arc<dyn ImportRepository>,
) -> Self {
Self {
repository,
embedder,
vector_repository,
import_repository,
}
}
fn upsert_embedding_for_bookmark(
&self,
bookmark_id: i32,
content: &str,
) -> ApplicationResult<()> {
match self.embedder.embed_document(content)? {
Some(embedding) => {
self.vector_repository
.upsert_embedding(bookmark_id, &embedding)
.app_context("upserting embedding into vector repository")?;
debug!("Stored embedding for bookmark {}", bookmark_id);
}
None => {
debug!(
"Embedder returned None for bookmark {} — skipping vector upsert",
bookmark_id
);
}
}
Ok(())
}
}
impl<R: BookmarkRepository> BookmarkService for BookmarkServiceImpl<R> {
#[instrument(skip(self, tags), level = "debug",
fields(url = %url, title = %title.unwrap_or("None"), fetch_metadata = %fetch_metadata))]
fn add_bookmark(
&self,
url: &str,
title: Option<&str>,
description: Option<&str>,
tags: Option<&HashSet<Tag>>,
fetch_metadata: bool,
) -> ApplicationResult<Bookmark> {
let existing_id = self
.repository
.exists_by_url(url)
.app_context("checking if bookmark with URL already exists")?;
if existing_id != -1 {
return Err(ApplicationError::BookmarkExists(
existing_id,
url.to_string(),
));
}
let (title_str, desc_str, _keywords) =
if fetch_metadata && (url.starts_with("http://") || url.starts_with("https://")) {
match http::load_url_details(url) {
Ok((t, d, k)) => (
title.map_or(t, |user_title| user_title.to_string()),
description.map_or(d, |user_desc| user_desc.to_string()),
k,
),
Err(e) => {
debug!("Failed to fetch URL metadata: {}", e);
(
title.map_or_else(|| "Untitled".to_string(), |t| t.to_string()),
description.map_or_else(String::new, ToString::to_string),
String::new(),
)
}
}
} else {
(
title.map_or_else(|| "Untitled".to_string(), ToString::to_string),
description.map_or_else(String::new, ToString::to_string),
String::new(),
)
};
let all_tags = tags.cloned().unwrap_or_default();
debug!(
"Creating bookmark: '{}' with {} tags",
title_str,
all_tags.len()
);
let mut bookmark =
Bookmark::new(url, &title_str, &desc_str, all_tags)
.app_context("creating new bookmark from provided data")?;
self.repository
.add(&mut bookmark)
.app_context("saving new bookmark to repository")?;
if bookmark.embeddable {
if let Some(id) = bookmark.id {
let content = bookmark.get_content_for_embedding();
self.upsert_embedding_for_bookmark(id, &content)?;
}
}
Ok(bookmark)
}
#[instrument(skip(self), level = "debug")]
fn delete_bookmark(&self, id: i32) -> ApplicationResult<bool> {
ValidationHelper::validate_bookmark_id(id)
.app_context("validating bookmark ID for deletion")?;
let result = self
.repository
.delete(id)
.with_app_context(|| format!("deleting bookmark with ID {}", id))?;
if let Err(e) = self.vector_repository.delete_embedding(id) {
debug!("Could not delete embedding for bookmark {}: {} (may not exist)", id, e);
}
Ok(result)
}
#[instrument(skip(self), level = "debug")]
fn get_bookmark(&self, id: i32) -> ApplicationResult<Option<Bookmark>> {
ValidationHelper::validate_bookmark_id(id)
.app_context("validating bookmark ID for retrieval")?;
let bookmark = self
.repository
.get_by_id(id)
.with_app_context(|| format!("retrieving bookmark with ID {}", id))?;
Ok(bookmark)
}
#[instrument(skip(self), level = "debug")]
fn set_bookmark_embeddable(&self, id: i32, embeddable: bool) -> ApplicationResult<Bookmark> {
let mut bookmark = ValidationHelper::validate_and_get_bookmark(id, &*self.repository)
.with_app_context(|| {
format!(
"validating and retrieving bookmark with ID {} for embeddable setting",
id
)
})?;
bookmark.set_embeddable(embeddable);
if !embeddable {
debug!("Setting bookmark {} to non-embeddable", id);
bookmark.embedding = None;
bookmark.content_hash = None;
if let Err(e) = self.vector_repository.delete_embedding(id) {
debug!("Could not delete embedding for bookmark {}: {} (may not exist)", id, e);
}
self.update_bookmark(bookmark, false)
} else {
self.update_bookmark(bookmark, true)
}
}
#[instrument(skip(self), level = "debug")]
fn update_bookmark(
&self,
mut bookmark: Bookmark,
force_embedding: bool,
) -> ApplicationResult<Bookmark> {
ValidationHelper::validate_bookmark_id(bookmark.id.ok_or_else(|| {
ApplicationError::Validation("Bookmark ID is required for update".to_string())
})?)
.app_context("validating bookmark ID for update operation")?;
let content = bookmark.get_content_for_embedding();
let new_hash = calc_content_hash(&content);
if bookmark.embeddable {
if force_embedding || bookmark.content_hash.as_ref() != Some(&new_hash) {
debug!(
"Generating new embedding (force={}, content_changed={})",
force_embedding,
bookmark.content_hash.as_ref() != Some(&new_hash)
);
if let Some(id) = bookmark.id {
self.upsert_embedding_for_bookmark(id, &content)?;
}
bookmark.content_hash = Some(new_hash);
bookmark.embedding = None;
} else {
debug!("Skipping embedding generation - content unchanged and not forced");
}
} else {
bookmark.embedding = None;
bookmark.content_hash = None;
}
self.repository
.update(&bookmark)
.with_app_context(|| format!("updating bookmark with ID {:?}", bookmark.id))?;
Ok(bookmark)
}
#[instrument(skip(self, tags), level = "debug")]
fn add_tags_to_bookmark(&self, id: i32, tags: &HashSet<Tag>) -> ApplicationResult<Bookmark> {
let mut bookmark = ValidationHelper::validate_and_get_bookmark(id, &*self.repository)
.with_app_context(|| {
format!(
"validating and retrieving bookmark with ID {} for adding tags",
id
)
})?;
for tag in tags {
bookmark
.add_tag(tag.clone())
.with_app_context(|| format!("adding tag '{}' to bookmark", tag.value()))?;
}
self.update_bookmark(bookmark, false)
.app_context("updating bookmark after adding tags")
}
#[instrument(skip(self, tags), level = "debug")]
fn remove_tags_from_bookmark(
&self,
id: i32,
tags: &HashSet<Tag>,
) -> ApplicationResult<Bookmark> {
let mut bookmark = ValidationHelper::validate_and_get_bookmark(id, &*self.repository)?;
for tag in tags {
let _ = bookmark.remove_tag(tag);
}
self.update_bookmark(bookmark, false)
}
#[instrument(skip(self, tags), level = "debug")]
fn replace_bookmark_tags(&self, id: i32, tags: &HashSet<Tag>) -> ApplicationResult<Bookmark> {
let mut bookmark = ValidationHelper::validate_and_get_bookmark(id, &*self.repository)?;
bookmark.set_tags(tags.clone())?;
self.update_bookmark(bookmark, false)
}
#[instrument(skip_all, level = "debug")]
fn search_bookmarks(&self, query: &BookmarkQuery) -> ApplicationResult<Vec<Bookmark>> {
debug!("Searching bookmarks with query: {:?}", query);
let bookmarks = self.repository.search(query)?;
Ok(bookmarks)
}
#[instrument(skip_all, level = "debug")]
fn search_bookmarks_by_text(&self, query: &str) -> ApplicationResult<Vec<Bookmark>> {
let query = BookmarkQuery::new()
.with_text_query(Some(query))
.with_sort(SortCriteria::new(SortField::Modified, SortDirection::Descending));
self.search_bookmarks(&query)
}
#[instrument(skip(self, search), level = "debug")]
fn semantic_search(
&self,
search: &SemanticSearch,
) -> ApplicationResult<Vec<SemanticSearchResult>> {
let query_embedding = match self.embedder.embed_query(&search.query)? {
Some(emb) => emb,
None => {
debug!("Embedder returned None for query — returning empty results");
return Ok(Vec::new());
}
};
let embedder_dims = self.embedder.dimensions();
if let Ok(Some(stored_dims)) = self.vector_repository.get_dimensions() {
if stored_dims != embedder_dims {
warn!(
"Dimension mismatch: embedder produces {} dims but vector store has {} dims. \
Run `bkmr backfill --force` to regenerate embeddings.",
embedder_dims, stored_dims
);
return Err(ApplicationError::Other(format!(
"Embedding dimension mismatch: model={}, stored={}. Run `bkmr backfill --force` to regenerate.",
embedder_dims, stored_dims
)));
}
}
let limit = search.limit.unwrap_or(10);
let nearest = self
.vector_repository
.search_nearest(&query_embedding, limit)
.app_context("searching nearest embeddings in vector repository")?;
let mut results = Vec::with_capacity(nearest.len());
for (bookmark_id, distance) in nearest {
match self.repository.get_by_id(bookmark_id)? {
Some(bookmark) => {
let similarity = 1.0 / (1.0 + distance);
results.push(SemanticSearchResult::new(bookmark, similarity));
}
None => {
debug!(
"Bookmark {} found in vector store but not in bookmarks table — skipping",
bookmark_id
);
}
}
}
Ok(results)
}
fn hybrid_search(
&self,
search: &HybridSearch,
) -> ApplicationResult<Vec<HybridSearchResult>> {
use crate::domain::search::{RankedResult, SearchMode};
let limit = search.effective_limit();
let internal_limit = std::cmp::max(limit * 4, 20);
let k = 60.0;
let filter_ids = if search.has_tag_filters() {
let all_bookmarks = self.repository.get_all()?;
let filtered = search.apply_tag_filters(&all_bookmarks);
let ids: std::collections::HashSet<i32> = filtered
.into_iter()
.filter_map(|b| b.id)
.collect();
if ids.is_empty() {
return Ok(vec![]);
}
Some(ids)
} else {
None
};
let fts_ranked = self
.repository
.get_bookmarks_fts_ranked(&search.query, filter_ids.as_ref())?;
let sem_ranked = if search.mode == SearchMode::Exact
|| self.embedder.dimensions() == 0
|| !self.vector_repository.has_embeddings().unwrap_or(false)
{
vec![]
} else {
let query_embedding = self.embedder.embed_query(&search.query)?;
match query_embedding {
Some(embedding) => {
let vec_results = self
.vector_repository
.search_nearest_filtered(
&embedding,
internal_limit,
filter_ids.as_ref(),
)?;
vec_results
.into_iter()
.enumerate()
.map(|(rank, (id, _distance))| RankedResult {
bookmark_id: id,
rank,
})
.collect()
}
None => vec![],
}
};
let fts_for_fusion: Vec<_> = fts_ranked.into_iter().take(internal_limit).collect();
let fused = RrfFusion::fuse(&fts_for_fusion, &sem_ranked, k, limit);
let mut results = Vec::with_capacity(fused.len());
for (bookmark_id, rrf_score) in fused {
if let Some(bookmark) = self.repository.get_by_id(bookmark_id)? {
results.push(HybridSearchResult::new(bookmark, rrf_score));
}
}
Ok(results)
}
#[instrument(skip(self), level = "debug")]
fn get_bookmark_by_url(&self, url: &str) -> ApplicationResult<Option<Bookmark>> {
let bookmark = self.repository.get_by_url(url)?;
Ok(bookmark)
}
#[instrument(skip(self), level = "debug")]
fn get_all_bookmarks(
&self,
sort_direction: Option<SortDirection>,
limit: Option<usize>,
) -> ApplicationResult<Vec<Bookmark>> {
let bookmarks = match sort_direction {
Some(direction) => self.repository.get_by_access_date(direction, limit)?,
None => {
let mut query = BookmarkQuery::new();
if let Some(limit_val) = limit {
query = query.with_limit(Some(limit_val));
}
self.repository.search(&query)?
}
};
Ok(bookmarks)
}
#[instrument(skip(self), level = "debug")]
fn get_random_bookmarks(&self, count: usize) -> ApplicationResult<Vec<Bookmark>> {
let bookmarks = self.repository.get_random(count)?;
Ok(bookmarks)
}
#[instrument(skip(self), level = "debug")]
fn get_bookmarks_for_forced_backfill(&self) -> ApplicationResult<Vec<Bookmark>> {
let all_bookmarks = self.repository.get_all()?;
let filtered_bookmarks = all_bookmarks
.into_iter()
.filter(|bookmark| bookmark.embeddable)
.collect();
Ok(filtered_bookmarks)
}
#[instrument(skip(self), level = "debug")]
fn get_bookmarks_without_embeddings(&self) -> ApplicationResult<Vec<Bookmark>> {
let embedded_ids = self.vector_repository.get_embedded_ids()?;
let bookmarks = self.repository.get_embeddable_without_embeddings()?;
Ok(bookmarks
.into_iter()
.filter(|b| b.id.map_or(true, |id| !embedded_ids.contains(&id)))
.collect())
}
#[instrument(skip(self), level = "debug")]
fn record_bookmark_access(&self, id: i32) -> ApplicationResult<Bookmark> {
let mut bookmark = ValidationHelper::validate_and_get_bookmark(id, &*self.repository)?;
bookmark.record_access();
self.repository.update_access(&bookmark)?;
Ok(bookmark)
}
#[instrument(skip(self), level = "debug")]
fn load_json_bookmarks(&self, path: &str, dry_run: bool) -> ApplicationResult<usize> {
let imports = self
.import_repository
.import_json_bookmarks(path)
.map_err(|e| ApplicationError::Other(format!("Failed to import data: {}", e)))?;
if dry_run {
return Ok(imports.len());
}
let mut processed_count = 0;
for import in imports {
let existing_id = self.repository.exists_by_url(&import.url)?;
if existing_id != -1 {
debug!(
"Bookmark with URL {} already exists (ID: {}), skipping",
import.url, existing_id
);
continue;
}
debug!("Processing import: {}", import.url);
let mut bookmark = Bookmark::new(
&import.url,
&import.title,
&import.content,
import.tags,
)?;
self.repository.add(&mut bookmark)?;
if bookmark.embeddable {
if let Some(id) = bookmark.id {
let content = bookmark.get_content_for_embedding();
self.upsert_embedding_for_bookmark(id, &content)?;
}
}
processed_count += 1;
}
Ok(processed_count)
}
#[instrument(skip(self), level = "debug")]
fn load_texts(&self, path: &str, dry_run: bool, force: bool) -> ApplicationResult<usize> {
let imports = self
.import_repository
.import_text_documents(path)
.map_err(|e| ApplicationError::Other(format!("Failed to import data: {}", e)))?;
if dry_run {
return Ok(imports.len());
}
let mut processed_count = 0;
for import in imports {
if let Some(existing) = self.repository.get_by_url(&import.url)? {
let content = get_content_for_embedding(&import);
let new_hash = calc_content_hash(&content);
if force || existing.content_hash.as_ref() != Some(&new_hash) {
eprintln!("Processing import: {}", import.url);
let mut updated = existing.clone();
updated.title = import.title;
updated.description = String::new(); updated.embedding = None;
updated.embeddable = true;
updated.content_hash = Some(new_hash);
self.repository.update(&updated)?;
if let Some(id) = updated.id {
self.upsert_embedding_for_bookmark(id, &content)?;
}
processed_count += 1;
} else {
debug!("Skipping import: {} (content unchanged)", import.url);
}
} else {
eprintln!("Processing import: {}", import.url);
let content = get_content_for_embedding(&import);
let content_hash = Some(calc_content_hash(&content));
let tags = import.tags.clone();
let mut bookmark = BookmarkBuilder::default()
.id(None)
.url(import.url)
.title(import.title)
.description(String::new())
.tags(tags)
.access_count(0)
.created_at(chrono::Utc::now())
.updated_at(chrono::Utc::now())
.embeddable(true)
.embedding(None::<Vec<u8>>)
.content_hash(content_hash)
.build()
.map_err(|e| ApplicationError::Domain(e.into()))?;
self.repository.add(&mut bookmark)?;
if let Some(id) = bookmark.id {
self.upsert_embedding_for_bookmark(id, &content)?;
}
processed_count += 1;
}
}
Ok(processed_count)
}
#[instrument(skip(self), level = "debug")]
fn import_files(
&self,
paths: &[String],
update: bool,
delete_missing: bool,
dry_run: bool,
verbose: bool,
base_path_name: Option<&str>,
) -> ApplicationResult<(usize, usize, usize)> {
use crate::domain::repositories::import_repository::ImportOptions;
debug!("Starting file import: paths={:?}, update={}, delete_missing={}, dry_run={}, verbose={}, base_path={:?}",
paths, update, delete_missing, dry_run, verbose, base_path_name);
let settings = crate::config::load_settings(None)
.map_err(|e| ApplicationError::Other(format!("Failed to load settings: {}", e)))?;
let actual_scan_paths = if let Some(base_name) = base_path_name {
if let Some(base_value) = settings.base_paths.get(base_name) {
let expanded_base = crate::config::resolve_file_path(&settings, base_value);
paths
.iter()
.map(|relative_path| {
let full_path = std::path::Path::new(&expanded_base).join(relative_path);
full_path.to_string_lossy().to_string()
})
.collect()
} else {
return Err(ApplicationError::Other(format!(
"Base path '{}' not found in configuration",
base_name
)));
}
} else {
paths.to_vec()
};
let options = ImportOptions {
update,
delete_missing,
dry_run,
verbose,
};
let file_imports = self
.import_repository
.import_files(&actual_scan_paths, &options)
.map_err(|e| ApplicationError::Other(format!("Failed to scan files: {}", e)))?;
debug!("Found {} files to process", file_imports.len());
let mut added_count = 0;
let mut updated_count = 0;
let mut deleted_count = 0;
for file_data in &file_imports {
if let Some(existing) = self.find_bookmark_by_name(&file_data.name)? {
if !update {
return Err(ApplicationError::DuplicateName {
name: file_data.name.clone(),
existing_id: existing.id.unwrap_or(-1),
file_path: file_data.file_path.display().to_string(),
});
}
let content_changed = existing.file_hash.as_ref() != Some(&file_data.file_hash);
let metadata_changed = self.has_metadata_changed(&existing, file_data)?;
if !content_changed && !metadata_changed {
debug!("Skipping {}: no changes detected", file_data.name);
continue;
}
if content_changed {
println!("Content changed: {}", file_data.name);
}
if metadata_changed {
println!("Metadata changed: {}", file_data.name);
}
if !dry_run {
self.update_bookmark_from_file(
&existing,
file_data,
&settings,
base_path_name,
)?;
}
updated_count += 1;
println!("Updated bookmark: {}", file_data.name);
} else {
if !dry_run {
self.create_bookmark_from_file(file_data, &settings, base_path_name)?;
}
added_count += 1;
println!("Added bookmark: {}", file_data.name);
}
}
if delete_missing {
let orphaned = self.find_orphaned_bookmarks(&actual_scan_paths, &file_imports)?;
for bookmark in orphaned {
if !dry_run {
if let Some(id) = bookmark.id {
self.repository.delete(id)?;
let _ = self.vector_repository.delete_embedding(id);
}
}
deleted_count += 1;
println!(
"Deleted orphaned bookmark: {} ({:?})",
bookmark.title, bookmark.id
);
}
}
Ok((added_count, updated_count, deleted_count))
}
}
impl<R: BookmarkRepository> BookmarkServiceImpl<R> {
fn find_bookmark_by_name(&self, name: &str) -> ApplicationResult<Option<Bookmark>> {
let mut query = BookmarkQuery::new();
query.text_query = Some(format!("\"{}\"", name));
let results = self.repository.search(&query)?;
for bookmark in results {
if bookmark.title == name {
return Ok(Some(bookmark));
}
}
Ok(None)
}
fn create_bookmark_from_file(
&self,
file_data: &FileImportData,
settings: &crate::config::Settings,
base_path_name: Option<&str>,
) -> ApplicationResult<Bookmark> {
use crate::domain::system_tag::SystemTag;
let system_tag = match file_data.content_type.as_str() {
"_snip_" => SystemTag::Snippet,
"_imported_" => SystemTag::Text,
"_shell_" => SystemTag::Shell,
"_md_" => SystemTag::Markdown,
"_env_" => SystemTag::Env,
"_mem_" => SystemTag::Memory,
_ => SystemTag::Shell, };
let mut all_tags = file_data.tags.clone();
all_tags.insert(system_tag.to_tag()?);
let mut bookmark = BookmarkBuilder::default()
.id(None)
.url(file_data.content.clone())
.title(file_data.name.clone())
.description(String::new())
.tags(all_tags)
.access_count(0)
.created_at(Some(chrono::Utc::now()))
.updated_at(chrono::Utc::now())
.embedding(None)
.content_hash(None)
.embeddable(true)
.file_path(None)
.file_mtime(None)
.file_hash(None)
.build()
.map_err(|e| ApplicationError::Other(format!("Failed to build bookmark: {}", e)))?;
let file_path_str = if let Some(base_name) = base_path_name {
if let Some(base_value) = settings.base_paths.get(base_name) {
let expanded_base = crate::config::resolve_file_path(settings, base_value);
let absolute_file_path = file_data.file_path.display().to_string();
if let Some(relative_path) = absolute_file_path.strip_prefix(&expanded_base) {
let relative_path = relative_path.strip_prefix('/').unwrap_or(relative_path);
crate::config::create_file_path_with_base(base_name, relative_path)
} else {
return Err(ApplicationError::Other(format!(
"File {} is not under base path {} ({})",
absolute_file_path, base_name, expanded_base
)));
}
} else {
return Err(ApplicationError::Other(format!(
"Base path '{}' not found in configuration",
base_name
)));
}
} else {
file_data.file_path.display().to_string()
};
bookmark.file_path = Some(file_path_str);
bookmark.file_mtime = Some(file_data.file_mtime as i32);
bookmark.file_hash = Some(file_data.file_hash.clone());
let embedding_content = bookmark.get_content_for_embedding();
bookmark.content_hash = Some(calc_content_hash(&embedding_content));
bookmark.embedding = None;
self.repository.add(&mut bookmark)?;
if bookmark.embeddable {
if let Some(id) = bookmark.id {
self.upsert_embedding_for_bookmark(id, &embedding_content)?;
}
}
Ok(bookmark)
}
fn update_bookmark_from_file(
&self,
existing: &Bookmark,
file_data: &FileImportData,
settings: &crate::config::Settings,
base_path_name: Option<&str>,
) -> ApplicationResult<Bookmark> {
let mut updated = existing.clone();
updated.url = file_data.content.clone();
updated.title = file_data.name.clone();
let file_path_str = if let Some(base_name) = base_path_name {
if let Some(base_value) = settings.base_paths.get(base_name) {
let expanded_base = crate::config::resolve_file_path(settings, base_value);
let absolute_file_path = file_data.file_path.display().to_string();
if let Some(relative_path) = absolute_file_path.strip_prefix(&expanded_base) {
let relative_path = relative_path.strip_prefix('/').unwrap_or(relative_path);
crate::config::create_file_path_with_base(base_name, relative_path)
} else {
return Err(ApplicationError::Other(format!(
"File {} is not under base path {} ({})",
absolute_file_path, base_name, expanded_base
)));
}
} else {
return Err(ApplicationError::Other(format!(
"Base path '{}' not found in configuration",
base_name
)));
}
} else {
file_data.file_path.display().to_string()
};
updated.file_path = Some(file_path_str);
updated.file_mtime = Some(file_data.file_mtime as i32);
updated.file_hash = Some(file_data.file_hash.clone());
let mut new_tags = file_data.tags.clone();
for tag in &existing.tags {
if tag.value().starts_with('_') && tag.value().ends_with('_') {
new_tags.insert(tag.clone());
}
}
updated.tags = new_tags;
let embedding_content = updated.get_content_for_embedding();
updated.content_hash = Some(calc_content_hash(&embedding_content));
updated.embedding = None;
self.repository.update(&updated)?;
if updated.embeddable {
if let Some(id) = updated.id {
self.upsert_embedding_for_bookmark(id, &embedding_content)?;
}
}
Ok(updated)
}
fn find_orphaned_bookmarks(
&self,
import_paths: &[String],
current_imports: &[FileImportData],
) -> ApplicationResult<Vec<Bookmark>> {
let all_bookmarks = self.repository.get_all()?;
let mut orphaned = Vec::new();
let current_file_paths: HashSet<_> = current_imports
.iter()
.map(|import| {
import
.file_path
.canonicalize()
.unwrap_or_else(|_| import.file_path.clone())
})
.collect();
for bookmark in all_bookmarks {
if let Some(file_path_str) = &bookmark.file_path {
let settings = crate::config::load_settings(None).map_err(|e| {
ApplicationError::Other(format!("Failed to load settings: {}", e))
})?;
let resolved_path = crate::config::resolve_file_path(&settings, file_path_str);
let path = Path::new(&resolved_path);
let file_exists = path.exists();
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
let found_in_scan = current_file_paths.contains(&canonical_path);
if !file_exists || !found_in_scan {
let should_delete = import_paths.iter().any(|import_path| {
path.starts_with(import_path)
|| path
.canonicalize()
.unwrap_or_else(|_| path.to_path_buf())
.starts_with(
Path::new(import_path)
.canonicalize()
.unwrap_or_else(|_| Path::new(import_path).to_path_buf()),
)
});
if should_delete {
orphaned.push(bookmark);
}
}
}
}
Ok(orphaned)
}
fn has_metadata_changed(
&self,
existing: &Bookmark,
file_data: &FileImportData,
) -> ApplicationResult<bool> {
if existing.title != file_data.name {
return Ok(true);
}
let existing_user_tags: HashSet<_> = existing
.tags
.iter()
.filter(|tag| !tag.value().starts_with('_') || !tag.value().ends_with('_'))
.cloned()
.collect();
let file_user_tags: HashSet<_> = file_data
.tags
.iter()
.filter(|tag| !tag.value().starts_with('_') || !tag.value().ends_with('_'))
.cloned()
.collect();
if existing_user_tags != file_user_tags {
return Ok(true);
}
let existing_has_shell = existing.tags.iter().any(|tag| tag.value() == "_shell_");
let existing_has_md = existing.tags.iter().any(|tag| tag.value() == "_md_");
let file_is_shell = file_data.content_type == "_shell_";
let file_is_md = file_data.content_type == "_md_";
if (existing_has_shell != file_is_shell) || (existing_has_md != file_is_md) {
return Ok(true);
}
Ok(false)
}
}
fn get_content_for_embedding(import: &BookmarkImportData) -> String {
build_embedding_content(&import.tags, &import.title, &import.content)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::infrastructure::embeddings::dummy_provider::DummyEmbedding;
use crate::infrastructure::repositories::json_import_repository::JsonImportRepository;
use crate::util::testing::{init_test_env, setup_test_db, EnvGuard};
use std::collections::HashSet;
fn create_test_service() -> impl BookmarkService {
use crate::infrastructure::repositories::null_vector_repository::NullVectorRepository;
let repository = setup_test_db();
let arc_repository = Arc::new(repository);
let embedder = Arc::new(DummyEmbedding);
let vector_repository = Arc::new(NullVectorRepository);
BookmarkServiceImpl::new(
arc_repository,
embedder,
vector_repository,
Arc::new(JsonImportRepository::new()),
)
}
#[test]
fn given_valid_id_when_get_bookmark_then_returns_correct_bookmark() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let bookmark = service.get_bookmark(1).unwrap();
assert!(bookmark.is_some(), "Should find bookmark with ID 1");
let bookmark = bookmark.unwrap();
assert_eq!(bookmark.id, Some(1));
assert_eq!(bookmark.url, "https://www.google.com");
assert_eq!(bookmark.title, "Google");
}
#[test]
fn given_invalid_id_when_get_bookmark_then_returns_none() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let result = service.get_bookmark(999).unwrap();
assert!(result.is_none(), "Should not find non-existent bookmark");
}
#[test]
fn given_negative_id_when_get_bookmark_then_returns_error() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let result = service.get_bookmark(-1);
assert!(result.is_err(), "Negative ID should return error");
match result {
Err(ApplicationError::Validation(msg)) => {
assert!(
msg.contains("Invalid bookmark ID"),
"Error should mention invalid ID"
);
}
_ => panic!("Expected a Validation error"),
}
}
#[test]
fn given_valid_url_when_get_bookmark_by_url_then_returns_correct_bookmark() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let result = service
.get_bookmark_by_url("https://www.google.com")
.unwrap();
assert!(result.is_some(), "Should find bookmark with URL");
let bookmark = result.unwrap();
assert_eq!(bookmark.url, "https://www.google.com");
assert_eq!(bookmark.title, "Google");
}
#[test]
fn given_new_bookmark_when_add_bookmark_then_creates_and_returns_bookmark() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let url = "https://newbookmark.example.com";
let title = "New Bookmark";
let description = "Test description";
let mut tags = HashSet::new();
tags.insert(Tag::new("test").unwrap());
let bookmark = service
.add_bookmark(url, Some(title), Some(description), Some(&tags), false)
.unwrap();
assert!(
bookmark.id.is_some(),
"Bookmark should have ID after adding"
);
assert_eq!(bookmark.url, url);
assert_eq!(bookmark.title, title);
assert_eq!(bookmark.description, description);
assert_eq!(bookmark.tags.len(), 1);
assert!(bookmark.tags.contains(&Tag::new("test").unwrap()));
let retrieved = service.get_bookmark(bookmark.id.unwrap()).unwrap().unwrap();
assert_eq!(retrieved.url, url);
}
#[test]
fn given_existing_url_when_add_bookmark_then_returns_error() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let existing_url = "https://www.google.com";
let result = service.add_bookmark(
existing_url,
Some("Title"),
Some("Description"),
None,
false,
);
assert!(result.is_err(), "Adding duplicate URL should fail");
match result {
Err(ApplicationError::BookmarkExists(_, url)) => {
assert_eq!(
url, existing_url,
"Error message should contain the existing URL"
);
}
_ => panic!("Expected a BookmarkExists error"),
}
}
#[test]
fn given_existing_bookmark_when_add_tags_then_adds_tags_correctly() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let id = 1; let mut tags = HashSet::new();
tags.insert(Tag::new("newtag").unwrap());
let original = service.get_bookmark(id).unwrap().unwrap();
let original_tag_count = original.tags.len();
let updated = service.add_tags_to_bookmark(id, &tags).unwrap();
assert!(updated.tags.contains(&Tag::new("newtag").unwrap()));
assert_eq!(updated.tags.len(), original_tag_count + 1);
let retrieved = service.get_bookmark(id).unwrap().unwrap();
assert!(retrieved.tags.contains(&Tag::new("newtag").unwrap()));
}
#[test]
fn given_existing_bookmark_when_remove_tags_then_removes_tags_correctly() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let bookmark = service.get_bookmark(1).unwrap().unwrap();
let tag_to_remove = bookmark.tags.iter().next().unwrap().clone();
let original_tag_count = bookmark.tags.len();
if original_tag_count == 0 {
return;
}
let mut tags_to_remove = HashSet::new();
tags_to_remove.insert(tag_to_remove.clone());
let updated = service
.remove_tags_from_bookmark(1, &tags_to_remove)
.unwrap();
assert!(!updated.tags.contains(&tag_to_remove));
assert_eq!(updated.tags.len(), original_tag_count - 1);
let retrieved = service.get_bookmark(1).unwrap().unwrap();
assert!(!retrieved.tags.contains(&tag_to_remove));
}
#[test]
fn given_existing_bookmark_when_replace_tags_then_replaces_all_tags() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let id = 1;
let mut new_tags = HashSet::new();
new_tags.insert(Tag::new("replaced1").unwrap());
new_tags.insert(Tag::new("replaced2").unwrap());
let updated = service.replace_bookmark_tags(id, &new_tags).unwrap();
assert_eq!(updated.tags.len(), 2);
assert!(updated.tags.contains(&Tag::new("replaced1").unwrap()));
assert!(updated.tags.contains(&Tag::new("replaced2").unwrap()));
let retrieved = service.get_bookmark(id).unwrap().unwrap();
assert_eq!(retrieved.tags.len(), 2);
assert!(retrieved.tags.contains(&Tag::new("replaced1").unwrap()));
assert!(retrieved.tags.contains(&Tag::new("replaced2").unwrap()));
}
#[test]
fn given_existing_bookmark_when_record_access_then_increments_access_count() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let id = 1;
let original = service.get_bookmark(id).unwrap().unwrap();
let original_count = original.access_count;
let updated = service.record_bookmark_access(id).unwrap();
assert_eq!(updated.access_count, original_count + 1);
let retrieved = service.get_bookmark(id).unwrap().unwrap();
assert_eq!(retrieved.access_count, original_count + 1);
}
#[test]
fn given_test_database_when_delete_bookmark_then_removes_bookmark() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let url = "https://todelete.example.com";
let bookmark = service
.add_bookmark(url, Some("To Delete"), Some("Description"), None, false)
.unwrap();
let id = bookmark.id.unwrap();
assert!(service.get_bookmark(id).unwrap().is_some());
let result = service.delete_bookmark(id).unwrap();
assert!(result, "Delete should return true on success");
assert!(service.get_bookmark(id).unwrap().is_none());
}
#[test]
fn given_text_query_when_search_by_text_then_returns_matching_bookmarks() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let results = service.search_bookmarks_by_text("Google").unwrap();
assert!(
!results.is_empty(),
"Should find bookmarks containing the text"
);
let has_match = results.iter().any(|b| {
b.title.contains("Google")
|| b.description.contains("Google")
|| b.url.contains("Google")
});
assert!(
has_match,
"At least one result should match the search text"
);
}
#[test]
fn given_test_database_when_get_all_bookmarks_then_returns_all_bookmarks() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let bookmarks = service.get_all_bookmarks(None, None).unwrap();
assert!(
!bookmarks.is_empty(),
"Should return all bookmarks from test database"
);
assert!(
bookmarks.len() >= 11,
"Should return at least the bookmarks from up.sql"
);
}
#[test]
fn given_count_when_get_random_bookmarks_then_returns_random_selection() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let count = 3;
let bookmarks = service.get_random_bookmarks(count).unwrap();
assert_eq!(
bookmarks.len(),
count,
"Should return exactly the requested number of bookmarks"
);
let another_set = service.get_random_bookmarks(count).unwrap();
let first_ids: HashSet<_> = bookmarks.iter().filter_map(|b| b.id).collect();
let second_ids: HashSet<_> = another_set.iter().filter_map(|b| b.id).collect();
let all_bookmarks = service.get_all_bookmarks(None, None).unwrap();
if all_bookmarks.len() > count * 3 {
assert_ne!(
first_ids, second_ids,
"Random selections should typically be different"
);
}
}
#[test]
fn given_test_database_when_get_bookmarks_without_embeddings_then_returns_correct_bookmarks() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let results = service.get_bookmarks_without_embeddings().unwrap();
for bookmark in &results {
assert!(
bookmark.embedding.is_none(),
"Returned bookmarks should not have embeddings"
);
}
}
#[test]
fn given_bookmark_when_set_embeddable_then_updates_flag() {
let _env = init_test_env();
let _guard = EnvGuard::new();
let service = create_test_service();
let url = "https://embeddingtest.example.com";
let bookmark = service
.add_bookmark(
url,
Some("Test Embeddable"),
Some("Description"),
None,
false,
)
.unwrap();
let id = bookmark.id.unwrap();
assert!(!bookmark.embeddable, "Default should be false");
let updated = service.set_bookmark_embeddable(id, true).unwrap();
assert!(updated.embeddable, "Flag should be updated to true");
let retrieved = service.get_bookmark(id).unwrap().unwrap();
assert!(retrieved.embeddable, "Flag should be persisted as true");
let updated_again = service.set_bookmark_embeddable(id, false).unwrap();
assert!(!updated_again.embeddable, "Flag should be updated to false");
let retrieved_again = service.get_bookmark(id).unwrap().unwrap();
assert!(
!retrieved_again.embeddable,
"Flag should be persisted as false"
);
}
}