use crate::{
ChunkAndIndexWorker, EmbeddingWorker, ExtractionWorker, run_pending, update_storage_accounting,
};
use orbok_cache::CacheService;
use orbok_core::{
FileStatus, HiddenFilePolicy, IndexMode, JobType, PersistenceMode, SourceType, SymlinkPolicy,
};
use orbok_db::Catalog;
use orbok_db::repo::{
FileRepository, IndexJobRepository, NewFile, NewSource, ObservedMetadata, SourceRepository,
};
use orbok_models::{MockEmbeddingModel, MockReranker};
use orbok_search::{HybridSearchService, SearchMode, contains_cjk, normalize_query};
use std::fs;
fn setup(root: &std::path::Path) -> (Catalog, CacheService) {
let catalog = Catalog::open(root.join("catalog.sqlite3")).unwrap();
let cache = CacheService::new(root);
(catalog, cache)
}
fn seed(
catalog: &Catalog,
cache: &CacheService,
root: &std::path::Path,
name: &str,
content: &str,
) -> orbok_core::FileId {
let path = root.join(name);
fs::write(&path, content).unwrap();
let canonical = fs::canonicalize(&path)
.unwrap()
.to_string_lossy()
.to_string();
let root_str = fs::canonicalize(root)
.unwrap()
.to_string_lossy()
.to_string();
let src = SourceRepository::new(catalog)
.insert(NewSource {
source_type: SourceType::File,
persistence_mode: PersistenceMode::Persistent,
display_name: Some(name.into()),
original_path: canonical.clone(),
canonical_path: root_str,
index_mode: IndexMode::Balanced,
include_patterns: vec![],
exclude_patterns: vec![],
hidden_file_policy: HiddenFilePolicy::Exclude,
symlink_policy: SymlinkPolicy::Ignore,
max_file_size_bytes: None,
})
.unwrap();
let file = FileRepository::new(catalog)
.insert(NewFile {
source_id: src.source_id.clone(),
original_path: canonical.clone(),
canonical_path: canonical,
display_path: name.into(),
extension: Some("md".into()),
metadata: ObservedMetadata {
file_size_bytes: content.len() as u64,
modified_at: Some("2026-01-01T00:00:00Z".into()),
platform_file_key: None,
content_hash: Some("abc".into()),
},
status: FileStatus::Discovered,
})
.unwrap();
IndexJobRepository::new(catalog)
.enqueue(JobType::Extract, Some(&src.source_id), Some(&file.file_id))
.unwrap();
let e = ExtractionWorker::new(catalog, cache);
let c = ChunkAndIndexWorker::new(catalog, cache);
run_pending(catalog, &e, &c, None, 50).unwrap();
file.file_id
}
#[test]
fn reranker_reorders_results() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
seed(&catalog, &cache, dir.path(), "short.md", "auth token\n");
seed(
&catalog,
&cache,
dir.path(),
"long.md",
"auth token — this document discusses authentication token rotation \
policies in detail, with many paragraphs of explanation.\n",
);
let reranker = MockReranker;
let service = HybridSearchService::keyword_only(&catalog).with_reranker(&reranker);
let results = service.search("auth token", SearchMode::Auto, 10).unwrap();
assert!(!results.is_empty());
let first_snippet_len = results[0].snippet.as_deref().unwrap_or("").len();
let last_snippet_len = results
.last()
.unwrap()
.snippet
.as_deref()
.unwrap_or("")
.len();
assert!(
first_snippet_len >= last_snippet_len,
"reranker should put longer passage first"
);
}
#[test]
fn search_works_without_reranker() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
seed(
&catalog,
&cache,
dir.path(),
"doc.md",
"important content here\n",
);
let service = HybridSearchService::keyword_only(&catalog);
let results = service.search("important", SearchMode::Auto, 10).unwrap();
assert!(!results.is_empty());
}
#[test]
fn fast_mode_returns_results_without_rerank() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
seed(
&catalog,
&cache,
dir.path(),
"doc.md",
"quick search test\n",
);
let reranker = MockReranker;
let service = HybridSearchService::keyword_only(&catalog).with_reranker(&reranker);
let results = service
.search("quick search", SearchMode::Fast, 10)
.unwrap();
assert!(!results.is_empty());
}
#[test]
fn storage_accounting_reflects_actual_data() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
seed(
&catalog,
&cache,
dir.path(),
"doc.md",
"# Title\n\nContent here.\n",
);
let cache_path = dir.path().join(orbok_db::CACHE_FILE_NAME);
let rows = update_storage_accounting(&catalog, &cache_path).unwrap();
assert!(!rows.is_empty());
let kw = rows
.iter()
.find(|(cat, _, _)| cat == &orbok_core::StorageCategory::KeywordIndex);
assert!(kw.is_some());
assert!(!SourceRepository::new(&catalog).list().unwrap().is_empty());
}
#[test]
fn delete_embeddings_preserves_file_catalog() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
let file_id = seed(&catalog, &cache, dir.path(), "doc.md", "some text\n");
catalog.lock().execute(
"INSERT OR IGNORE INTO models (model_id, role, model_name, model_version, \
dimension, status, created_at, updated_at) VALUES ('mock_mock-v1','embedding','mock','v1',8,'available','t','t')",
[],
).unwrap();
EmbeddingWorker::with_mock(&catalog, &cache)
.run(&file_id)
.unwrap();
catalog
.lock()
.execute("DELETE FROM embeddings", [])
.unwrap();
assert!(
FileRepository::new(&catalog)
.get_by_id(&file_id)
.unwrap()
.is_some()
);
assert!(!SourceRepository::new(&catalog).list().unwrap().is_empty());
}
#[test]
fn search_results_carry_keyword_badge() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
seed(
&catalog,
&cache,
dir.path(),
"doc.md",
"authentication token rotation\n",
);
let results = HybridSearchService::keyword_only(&catalog)
.search("authentication", SearchMode::Auto, 10)
.unwrap();
assert!(!results.is_empty());
assert!(
results[0]
.badges
.contains(&orbok_search::MatchBadge::Keyword)
);
}
#[test]
fn search_view_handles_no_snippet() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
let fid = seed(&catalog, &cache, dir.path(), "temp.md", "content\n");
fs::remove_file(dir.path().join("temp.md")).unwrap();
let results = HybridSearchService::keyword_only(&catalog)
.search("content", SearchMode::Auto, 10)
.unwrap();
if !results.is_empty() {
assert!(!results[0].canonical_path.is_empty());
}
}
#[test]
fn result_selection_concept_documented() {
let selected: Option<usize> = None;
assert!(selected.is_none(), "no result selected initially");
let selected = Some(0usize);
assert_eq!(selected, Some(0));
}
#[test]
fn fullwidth_normalizes_to_halfwidth() {
assert_eq!(normalize_query("ABC123"), "ABC123");
assert_eq!(normalize_query("abc"), "abc");
}
#[test]
fn identifier_preserved_through_normalize() {
let q = "client_secret";
assert_eq!(normalize_query(q), q);
}
#[test]
fn rfc_style_identifier_preserved() {
assert_eq!(normalize_query("RFC-014"), "RFC-014");
}
#[test]
fn cjk_query_routes_to_trigram() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
seed(
&catalog,
&cache,
dir.path(),
"ja.md",
"# 認証トークンのローテーション\n\nOAuthクライアントシークレットの有効期限を設定します。\n",
);
use orbok_search::KeywordSearchEngine;
use orbok_search::MultilingualKeywordEngine;
let engine = MultilingualKeywordEngine::new(&catalog);
let results = engine.search("認証トークン", 10).unwrap();
let _ = results;
}
#[test]
fn japanese_query_does_not_break_english_search() {
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
seed(
&catalog,
&cache,
dir.path(),
"code.md",
"fn refresh_token() -> Token { ... }\n",
);
let results = HybridSearchService::keyword_only(&catalog)
.search("refresh_token", SearchMode::Exact, 10)
.unwrap();
assert!(
!results.is_empty(),
"English identifier search must work alongside Japanese support"
);
}
#[test]
fn cjk_detection_correct() {
assert!(contains_cjk("認証"));
assert!(contains_cjk("OAuth クライアント"));
assert!(!contains_cjk("refresh_token"));
assert!(!contains_cjk("ABC 123"));
}
#[test]
fn safe_cleanup_preserves_sources() {
use crate::CleanupService;
use orbok_core::{CleanupAction, CleanupPlan};
let dir = tempfile::tempdir().unwrap();
let (catalog, cache) = setup(dir.path());
seed(
&catalog,
&cache,
dir.path(),
"note.md",
"# Test\n\nContent.\n",
);
assert!(!SourceRepository::new(&catalog).list().unwrap().is_empty());
let cache_db = dir.path().join("orbok-cache.sqlite3");
for action in [
CleanupAction::ClearSnippetCache,
CleanupAction::ClearExpiredSearchCache,
CleanupAction::ClearTemporaryExtraction,
CleanupAction::RemoveReplacedStaleIndexes,
] {
let plan = CleanupPlan::for_action(action, 0);
CleanupService::new(&catalog, &cache, &cache_db)
.run_safe(&plan)
.expect("safe cleanup must not error");
assert!(
!SourceRepository::new(&catalog).list().unwrap().is_empty(),
"source must survive safe cleanup action {action:?}"
);
}
}