1use std::collections::HashSet;
13use std::path::{Path, PathBuf};
14use std::sync::Arc;
15
16use crate::search::embedder::Embedder;
17use crate::search::fastembed_embedder::FastEmbedder;
18use crate::search::hash_embedder::HashEmbedder;
19use crate::search::model_download::{
20 ModelAcquisitionPolicy, ModelCacheState, ModelManifest, classify_model_cache,
21 classify_model_cache_metadata,
22};
23use crate::search::policy::{CliSemanticOverrides, SemanticPolicy};
24use crate::search::semantic_manifest::{
25 SemanticShardManifest, SemanticShardRecord, TierKind, semantic_shard_artifact_path_is_safe,
26};
27use crate::search::vector_index::{
28 ROLE_ASSISTANT, ROLE_USER, SemanticFilterMaps, VectorIndex, vector_index_path,
29};
30use crate::storage::sqlite::FrankenStorage;
31
32#[derive(Debug, Clone)]
40pub enum SemanticAvailability {
41 Ready { embedder_id: String },
43
44 NotInstalled,
50
51 NeedsConsent,
54
55 Downloading {
57 progress_pct: u8,
59 bytes_downloaded: u64,
61 total_bytes: u64,
63 },
64
65 Verifying,
67
68 IndexBuilding {
70 embedder_id: String,
71 progress_pct: Option<u8>,
73 items_indexed: u64,
75 total_items: u64,
77 },
78
79 HashFallback,
81
82 Disabled { reason: String },
84
85 ModelMissing {
90 model_dir: PathBuf,
91 missing_files: Vec<String>,
92 },
93
94 IndexMissing { index_path: PathBuf },
96
97 DatabaseUnavailable { db_path: PathBuf, error: String },
99
100 LoadFailed { context: String },
102
103 UpdateAvailable {
105 embedder_id: String,
106 current_revision: String,
107 latest_revision: String,
108 },
109}
110
111impl SemanticAvailability {
112 pub fn is_ready(&self) -> bool {
114 matches!(self, SemanticAvailability::Ready { .. })
115 }
116
117 pub fn has_update(&self) -> bool {
119 matches!(self, SemanticAvailability::UpdateAvailable { .. })
120 }
121
122 pub fn is_building(&self) -> bool {
124 matches!(self, SemanticAvailability::IndexBuilding { .. })
125 }
126
127 pub fn is_downloading(&self) -> bool {
129 matches!(self, SemanticAvailability::Downloading { .. })
130 }
131
132 pub fn needs_consent(&self) -> bool {
134 matches!(self, SemanticAvailability::NeedsConsent)
135 }
136
137 pub fn is_hash_fallback(&self) -> bool {
139 matches!(self, SemanticAvailability::HashFallback)
140 }
141
142 pub fn is_disabled(&self) -> bool {
144 matches!(self, SemanticAvailability::Disabled { .. })
145 }
146
147 pub fn is_not_installed(&self) -> bool {
149 matches!(
150 self,
151 SemanticAvailability::NotInstalled | SemanticAvailability::ModelMissing { .. }
152 )
153 }
154
155 pub fn is_error(&self) -> bool {
157 matches!(
158 self,
159 SemanticAvailability::LoadFailed { .. }
160 | SemanticAvailability::DatabaseUnavailable { .. }
161 )
162 }
163
164 pub fn can_search(&self) -> bool {
166 matches!(
167 self,
168 SemanticAvailability::Ready { .. } | SemanticAvailability::HashFallback
169 )
170 }
171
172 pub fn download_progress(&self) -> Option<(u8, u64, u64)> {
174 match self {
175 SemanticAvailability::Downloading {
176 progress_pct,
177 bytes_downloaded,
178 total_bytes,
179 } => Some((*progress_pct, *bytes_downloaded, *total_bytes)),
180 _ => None,
181 }
182 }
183
184 pub fn index_progress(&self) -> Option<(Option<u8>, u64, u64)> {
186 match self {
187 SemanticAvailability::IndexBuilding {
188 progress_pct,
189 items_indexed,
190 total_items,
191 ..
192 } => Some((*progress_pct, *items_indexed, *total_items)),
193 _ => None,
194 }
195 }
196
197 pub fn status_label(&self) -> &'static str {
199 match self {
200 SemanticAvailability::Ready { .. } => "SEM",
201 SemanticAvailability::HashFallback => "SEM*",
202 SemanticAvailability::NotInstalled => "LEX",
203 SemanticAvailability::NeedsConsent => "LEX",
204 SemanticAvailability::Downloading { .. } => "DL...",
205 SemanticAvailability::Verifying => "VFY...",
206 SemanticAvailability::IndexBuilding { .. } => "IDX...",
207 SemanticAvailability::Disabled { .. } => "OFF",
208 SemanticAvailability::ModelMissing { .. } => "NOMODEL",
209 SemanticAvailability::IndexMissing { .. } => "NOIDX",
210 SemanticAvailability::DatabaseUnavailable { .. } => "NODB",
211 SemanticAvailability::LoadFailed { .. } => "ERR",
212 SemanticAvailability::UpdateAvailable { .. } => "UPD",
213 }
214 }
215
216 pub fn summary(&self) -> String {
218 match self {
219 SemanticAvailability::Ready { embedder_id } => {
220 format!("semantic ready ({embedder_id})")
221 }
222 SemanticAvailability::NotInstalled => "model not installed".to_string(),
223 SemanticAvailability::NeedsConsent => "consent required for model download".to_string(),
224 SemanticAvailability::Downloading {
225 progress_pct,
226 bytes_downloaded,
227 total_bytes,
228 } => {
229 let mb_done = *bytes_downloaded as f64 / 1_048_576.0;
230 let mb_total = *total_bytes as f64 / 1_048_576.0;
231 format!("downloading model: {progress_pct}% ({mb_done:.1}/{mb_total:.1} MB)")
232 }
233 SemanticAvailability::Verifying => "verifying model checksum".to_string(),
234 SemanticAvailability::IndexBuilding {
235 items_indexed,
236 total_items,
237 progress_pct,
238 ..
239 } => {
240 if let Some(pct) = progress_pct {
241 format!("building index: {pct}% ({items_indexed}/{total_items})")
242 } else {
243 format!("building index: {items_indexed}/{total_items}")
244 }
245 }
246 SemanticAvailability::HashFallback => "using hash-based fallback".to_string(),
247 SemanticAvailability::Disabled { reason } => {
248 format!("semantic disabled: {reason}")
249 }
250 SemanticAvailability::ModelMissing { model_dir, .. } => {
251 format!("model missing at {}", model_dir.display())
252 }
253 SemanticAvailability::IndexMissing { index_path } => {
254 format!("vector index missing at {}", index_path.display())
255 }
256 SemanticAvailability::DatabaseUnavailable { error, .. } => {
257 format!("db unavailable ({error})")
258 }
259 SemanticAvailability::LoadFailed { context } => {
260 format!("semantic load failed ({context})")
261 }
262 SemanticAvailability::UpdateAvailable {
263 current_revision,
264 latest_revision,
265 ..
266 } => {
267 format!("update available: {current_revision} -> {latest_revision}")
268 }
269 }
270 }
271}
272
273pub struct SemanticContext {
274 pub embedder: Arc<dyn Embedder>,
275 pub index: VectorIndex,
276 pub additional_indexes: Vec<VectorIndex>,
277 pub filter_maps: SemanticFilterMaps,
278 pub roles: Option<HashSet<u8>>,
279}
280
281pub struct SemanticSetup {
282 pub availability: SemanticAvailability,
283 pub context: Option<SemanticContext>,
284}
285
286fn semantic_sidecar_path(data_dir: &Path, recorded_path: &str) -> Option<PathBuf> {
287 semantic_shard_artifact_path_is_safe(recorded_path).then(|| data_dir.join(recorded_path))
288}
289
290fn matching_complete_shard_records(
291 data_dir: &Path,
292 tier: TierKind,
293 embedder_id: &str,
294 db_fingerprint: &str,
295) -> Result<Option<Vec<SemanticShardRecord>>, String> {
296 let manifest = match SemanticShardManifest::load(data_dir) {
297 Ok(Some(manifest)) => manifest,
298 Ok(None) => return Ok(None),
299 Err(err) => return Err(format!("semantic shard manifest: {err}")),
300 };
301 let summary = manifest.summary(tier, embedder_id, db_fingerprint);
302 if !summary.complete {
303 return Ok(None);
304 }
305
306 let mut records = manifest
307 .shards
308 .into_iter()
309 .filter(|shard| shard.matches_generation(tier, embedder_id, db_fingerprint))
310 .collect::<Vec<_>>();
311 records.sort_by_key(|shard| shard.shard_index);
312 if records.len() != usize::try_from(summary.shard_count).unwrap_or(usize::MAX) {
313 return Ok(None);
314 }
315
316 let Some(first) = records.first() else {
317 return Ok(None);
318 };
319 for (expected_index, shard) in records.iter().enumerate() {
320 if shard.shard_index != u32::try_from(expected_index).unwrap_or(u32::MAX)
321 || !shard.ready
322 || !shard.mmap_ready
323 || shard.model_revision != first.model_revision
324 || shard.schema_version != crate::search::policy::SEMANTIC_SCHEMA_VERSION
325 || shard.chunking_version != crate::search::policy::CHUNKING_STRATEGY_VERSION
326 || shard.dimension == 0
327 || shard.dimension != first.dimension
328 || shard.total_conversations != first.total_conversations
329 {
330 return Ok(None);
331 }
332 let Some(path) = semantic_sidecar_path(data_dir, &shard.index_path) else {
333 return Ok(None);
334 };
335 if !path.is_file() {
336 return Ok(None);
337 }
338 }
339
340 Ok(Some(records))
341}
342
343fn load_complete_shard_indexes(
344 data_dir: &Path,
345 embedder_id: &str,
346 db_fingerprint: &str,
347) -> Result<Option<Vec<VectorIndex>>, String> {
348 for tier in [TierKind::Quality, TierKind::Fast] {
349 let Some(records) =
350 matching_complete_shard_records(data_dir, tier, embedder_id, db_fingerprint)?
351 else {
352 continue;
353 };
354
355 let mut indexes = Vec::with_capacity(records.len());
356 for shard in records {
357 let Some(path) = semantic_sidecar_path(data_dir, &shard.index_path) else {
358 return Ok(None);
359 };
360 let index = VectorIndex::open(&path)
361 .map_err(|err| format!("semantic shard vector index {}: {err}", path.display()))?;
362 if index.embedder_id() != embedder_id || index.dimension() != shard.dimension {
363 return Err(format!(
364 "semantic shard vector index {} metadata mismatch",
365 path.display()
366 ));
367 }
368 indexes.push(index);
369 }
370 if !indexes.is_empty() {
371 tracing::info!(
372 tier = tier.as_str(),
373 embedder = embedder_id,
374 shard_count = indexes.len(),
375 "loaded complete semantic shard generation"
376 );
377 return Ok(Some(indexes));
378 }
379 }
380
381 Ok(None)
382}
383
384fn load_complete_shard_indexes_for_current_db(
385 data_dir: &Path,
386 db_path: &Path,
387 embedder_id: &str,
388 context_label: &'static str,
389) -> Option<Vec<VectorIndex>> {
390 let db_fingerprint = match crate::indexer::lexical_storage_fingerprint_for_db(db_path) {
391 Ok(fingerprint) => fingerprint,
392 Err(err) => {
393 tracing::debug!(
394 error = %err,
395 embedder = embedder_id,
396 context = context_label,
397 "semantic shard context unavailable: failed to fingerprint current DB"
398 );
399 return None;
400 }
401 };
402
403 match load_complete_shard_indexes(data_dir, embedder_id, &db_fingerprint) {
404 Ok(indexes) => indexes,
405 Err(err) => {
406 tracing::debug!(
407 error = %err,
408 embedder = embedder_id,
409 context = context_label,
410 "semantic shard context unavailable"
411 );
412 None
413 }
414 }
415}
416
417pub fn load_semantic_context(data_dir: &Path, db_path: &Path) -> SemanticSetup {
422 load_semantic_context_inner(data_dir, db_path, true)
423}
424
425pub(crate) fn probe_semantic_availability(data_dir: &Path) -> SemanticAvailability {
429 let model_dir = FastEmbedder::default_model_dir(data_dir);
430 let manifest = ModelManifest::minilm_v2();
431 let semantic_policy = SemanticPolicy::resolve(&CliSemanticOverrides::default());
432 let acquisition_policy = ModelAcquisitionPolicy::from_semantic_policy(&semantic_policy);
433 let cache_report = classify_model_cache_metadata(&model_dir, &manifest, &acquisition_policy);
434
435 if let Some(availability) =
436 semantic_availability_from_cache_state(&model_dir, &cache_report.state, true)
437 {
438 return availability;
439 }
440
441 let index_path = vector_index_path(data_dir, FastEmbedder::embedder_id_static());
442 if !index_path.is_file() {
443 return SemanticAvailability::IndexMissing { index_path };
444 }
445
446 SemanticAvailability::Ready {
447 embedder_id: FastEmbedder::embedder_id_static().to_string(),
448 }
449}
450
451pub(crate) fn probe_hash_semantic_availability(data_dir: &Path) -> SemanticAvailability {
453 let embedder = HashEmbedder::default();
454 let index_path = vector_index_path(data_dir, embedder.id());
455 if !index_path.is_file() {
456 SemanticAvailability::IndexMissing { index_path }
457 } else {
458 SemanticAvailability::HashFallback
459 }
460}
461
462pub fn load_hash_semantic_context(data_dir: &Path, db_path: &Path) -> SemanticSetup {
464 let embedder = HashEmbedder::default();
465 let index_path = vector_index_path(data_dir, embedder.id());
466 let monolithic_present = index_path.is_file();
467 let shard_indexes = load_complete_shard_indexes_for_current_db(
468 data_dir,
469 db_path,
470 embedder.id(),
471 "hash semantic",
472 );
473 if !monolithic_present && shard_indexes.is_none() {
474 return SemanticSetup {
475 availability: SemanticAvailability::IndexMissing { index_path },
476 context: None,
477 };
478 }
479
480 let storage = match FrankenStorage::open_readonly(db_path) {
481 Ok(storage) => storage,
482 Err(err) => {
483 return SemanticSetup {
484 availability: SemanticAvailability::DatabaseUnavailable {
485 db_path: db_path.to_path_buf(),
486 error: err.to_string(),
487 },
488 context: None,
489 };
490 }
491 };
492
493 let filter_maps = match SemanticFilterMaps::from_storage(&storage) {
494 Ok(maps) => maps,
495 Err(err) => {
496 return SemanticSetup {
497 availability: SemanticAvailability::LoadFailed {
498 context: format!("filter maps: {err}"),
499 },
500 context: None,
501 };
502 }
503 };
504
505 let (index, additional_indexes) = if let Some(mut indexes) = shard_indexes {
506 let index = indexes.remove(0);
507 (index, indexes)
508 } else {
509 match VectorIndex::open(&index_path) {
510 Ok(index) => (index, Vec::new()),
511 Err(err) => {
512 return SemanticSetup {
513 availability: SemanticAvailability::LoadFailed {
514 context: format!("vector index: {err}"),
515 },
516 context: None,
517 };
518 }
519 }
520 };
521
522 let roles = Some(HashSet::from([ROLE_USER, ROLE_ASSISTANT]));
523 let embedder = Arc::new(embedder) as Arc<dyn Embedder>;
524
525 SemanticSetup {
526 availability: SemanticAvailability::HashFallback,
527 context: Some(SemanticContext {
528 embedder,
529 index,
530 additional_indexes,
531 filter_maps,
532 roles,
533 }),
534 }
535}
536
537pub fn load_semantic_context_no_version_check(data_dir: &Path, db_path: &Path) -> SemanticSetup {
542 load_semantic_context_inner(data_dir, db_path, false)
543}
544
545fn load_semantic_context_inner(
546 data_dir: &Path,
547 db_path: &Path,
548 check_for_updates: bool,
549) -> SemanticSetup {
550 let model_dir = FastEmbedder::default_model_dir(data_dir);
551 let manifest = ModelManifest::minilm_v2();
552 let semantic_policy = SemanticPolicy::resolve(&CliSemanticOverrides::default());
553 let acquisition_policy = ModelAcquisitionPolicy::from_semantic_policy(&semantic_policy);
554 let cache_report = classify_model_cache(&model_dir, &manifest, &acquisition_policy);
555
556 if let Some(availability) =
557 semantic_availability_from_cache_state(&model_dir, &cache_report.state, check_for_updates)
558 {
559 return SemanticSetup {
560 availability,
561 context: None,
562 };
563 }
564
565 let index_path = vector_index_path(data_dir, FastEmbedder::embedder_id_static());
566 let monolithic_present = index_path.is_file();
567 let shard_indexes = load_complete_shard_indexes_for_current_db(
568 data_dir,
569 db_path,
570 FastEmbedder::embedder_id_static(),
571 "semantic",
572 );
573 if !monolithic_present && shard_indexes.is_none() {
574 return SemanticSetup {
575 availability: SemanticAvailability::IndexMissing { index_path },
576 context: None,
577 };
578 }
579
580 let storage = match FrankenStorage::open_readonly(db_path) {
581 Ok(storage) => storage,
582 Err(err) => {
583 return SemanticSetup {
584 availability: SemanticAvailability::DatabaseUnavailable {
585 db_path: db_path.to_path_buf(),
586 error: err.to_string(),
587 },
588 context: None,
589 };
590 }
591 };
592
593 let filter_maps = match SemanticFilterMaps::from_storage(&storage) {
594 Ok(maps) => maps,
595 Err(err) => {
596 return SemanticSetup {
597 availability: SemanticAvailability::LoadFailed {
598 context: format!("filter maps: {err}"),
599 },
600 context: None,
601 };
602 }
603 };
604
605 let (index, additional_indexes) = if let Some(mut indexes) = shard_indexes {
606 let index = indexes.remove(0);
607 (index, indexes)
608 } else {
609 match VectorIndex::open(&index_path) {
610 Ok(index) => (index, Vec::new()),
611 Err(err) => {
612 return SemanticSetup {
613 availability: SemanticAvailability::LoadFailed {
614 context: format!("vector index: {err}"),
615 },
616 context: None,
617 };
618 }
619 }
620 };
621
622 let embedder = match FastEmbedder::load_from_dir(&model_dir) {
623 Ok(embedder) => Arc::new(embedder) as Arc<dyn Embedder>,
624 Err(err) => {
625 return SemanticSetup {
626 availability: SemanticAvailability::LoadFailed {
627 context: format!("model load: {err}"),
628 },
629 context: None,
630 };
631 }
632 };
633
634 let roles = Some(HashSet::from([ROLE_USER, ROLE_ASSISTANT]));
635
636 SemanticSetup {
637 availability: SemanticAvailability::Ready {
638 embedder_id: embedder.id().to_string(),
639 },
640 context: Some(SemanticContext {
641 embedder,
642 index,
643 additional_indexes,
644 filter_maps,
645 roles,
646 }),
647 }
648}
649
650fn semantic_availability_from_cache_state(
651 model_dir: &Path,
652 state: &ModelCacheState,
653 check_for_updates: bool,
654) -> Option<SemanticAvailability> {
655 match state {
656 ModelCacheState::Acquired { .. }
657 | ModelCacheState::PreseededLocal { .. }
658 | ModelCacheState::MirrorSourced { .. } => None,
659 ModelCacheState::IncompatibleVersion {
660 current_revision,
661 expected_revision,
662 } if check_for_updates => Some(SemanticAvailability::UpdateAvailable {
663 embedder_id: FastEmbedder::embedder_id_static().to_string(),
664 current_revision: current_revision.clone(),
665 latest_revision: expected_revision.clone(),
666 }),
667 ModelCacheState::IncompatibleVersion { .. } => None,
668 ModelCacheState::NotAcquired {
669 missing_files,
670 needs_consent,
671 } => {
672 if *needs_consent {
673 Some(SemanticAvailability::NeedsConsent)
674 } else {
675 Some(SemanticAvailability::ModelMissing {
676 model_dir: model_dir.to_path_buf(),
677 missing_files: missing_files.clone(),
678 })
679 }
680 }
681 ModelCacheState::Acquiring {
682 bytes_present,
683 total_bytes,
684 ..
685 } => {
686 let progress_pct = if *total_bytes == 0 {
687 0
688 } else {
689 ((*bytes_present as f64 / *total_bytes as f64) * 100.0).min(100.0) as u8
690 };
691 Some(SemanticAvailability::Downloading {
692 progress_pct,
693 bytes_downloaded: *bytes_present,
694 total_bytes: *total_bytes,
695 })
696 }
697 ModelCacheState::ChecksumMismatch {
698 file,
699 expected,
700 actual,
701 } => Some(SemanticAvailability::LoadFailed {
702 context: format!(
703 "model checksum mismatch for {file}: expected {expected}, got {actual}"
704 ),
705 }),
706 ModelCacheState::DisabledByPolicy { reason } => Some(SemanticAvailability::Disabled {
707 reason: reason.clone(),
708 }),
709 ModelCacheState::BudgetBlocked {
710 required_bytes,
711 max_bytes,
712 } => Some(SemanticAvailability::Disabled {
713 reason: format!(
714 "semantic model requires {required_bytes} bytes but policy allows {max_bytes}"
715 ),
716 }),
717 ModelCacheState::QuarantinedCorrupt {
718 marker_path,
719 reason,
720 } => Some(SemanticAvailability::LoadFailed {
721 context: format!(
722 "model cache quarantined at {}: {reason}",
723 marker_path.display()
724 ),
725 }),
726 ModelCacheState::OfflineBlocked { missing_files } => Some(SemanticAvailability::Disabled {
727 reason: format!(
728 "offline and semantic model is not acquired: missing {}",
729 missing_files.join(", ")
730 ),
731 }),
732 }
733}
734
735pub fn needs_index_rebuild(data_dir: &Path) -> bool {
743 let index_path = vector_index_path(data_dir, FastEmbedder::embedder_id_static());
744
745 if !index_path.is_file() {
746 return false;
748 }
749
750 match VectorIndex::open(&index_path) {
752 Ok(index) => {
753 let expected_id = FastEmbedder::embedder_id_static();
756 index.embedder_id() != expected_id
757 }
758 Err(_) => {
759 true
761 }
762 }
763}
764
765pub fn delete_vector_index_for_rebuild(data_dir: &Path) -> std::io::Result<bool> {
776 let index_path = vector_index_path(data_dir, FastEmbedder::embedder_id_static());
777
778 if index_path.is_file() {
779 std::fs::remove_file(&index_path)?;
780 Ok(true)
781 } else {
782 Ok(false)
783 }
784}
785
786pub fn default_model_dir(data_dir: &Path) -> PathBuf {
788 FastEmbedder::default_model_dir(data_dir)
789}
790
791pub fn default_model_manifest() -> ModelManifest {
793 ModelManifest::minilm_v2()
794}
795
796#[cfg(test)]
797mod tests {
798 use super::*;
799 use tempfile::tempdir;
800
801 type AvailabilityTuiCase = (
802 SemanticAvailability,
803 &'static str,
804 fn(&SemanticAvailability) -> bool,
805 );
806
807 #[test]
808 fn test_semantic_availability_ready() {
809 let ready = SemanticAvailability::Ready {
810 embedder_id: "test-123".into(),
811 };
812 assert!(ready.summary().contains("semantic ready"));
813 assert!(ready.is_ready());
814 assert!(!ready.has_update());
815 assert!(ready.can_search());
816 assert_eq!(ready.status_label(), "SEM");
817 }
818
819 #[test]
820 fn semantic_sidecar_path_rejects_paths_outside_data_dir() {
821 let tmp = tempdir().unwrap();
822 let safe = semantic_sidecar_path(tmp.path(), "vector_index/shards/hash/shard-0.fsvi")
823 .expect("safe relative shard path");
824 assert_eq!(
825 safe,
826 tmp.path().join("vector_index/shards/hash/shard-0.fsvi")
827 );
828
829 for unsafe_path in [
830 tmp.path()
831 .join("outside.fsvi")
832 .to_string_lossy()
833 .to_string(),
834 "../outside.fsvi".to_string(),
835 "vector_index/../outside.fsvi".to_string(),
836 "./vector_index/shards/hash/shard-0.fsvi".to_string(),
837 ] {
838 assert!(
839 semantic_sidecar_path(tmp.path(), &unsafe_path).is_none(),
840 "unsafe semantic sidecar path should be rejected: {unsafe_path}"
841 );
842 }
843 }
844
845 #[test]
846 fn test_semantic_availability_update() {
847 let update = SemanticAvailability::UpdateAvailable {
848 embedder_id: "test".into(),
849 current_revision: "v1".into(),
850 latest_revision: "v2".into(),
851 };
852 assert!(update.summary().contains("update available"));
853 assert!(!update.is_ready());
854 assert!(update.has_update());
855 assert_eq!(update.status_label(), "UPD");
856 }
857
858 #[test]
859 fn test_semantic_availability_index_building() {
860 let building = SemanticAvailability::IndexBuilding {
861 embedder_id: "test".into(),
862 progress_pct: Some(45),
863 items_indexed: 100,
864 total_items: 200,
865 };
866 assert!(building.summary().contains("building index"));
867 assert!(building.summary().contains("45%"));
868 assert!(building.is_building());
869 assert_eq!(building.status_label(), "IDX...");
870
871 let (pct, done, total) = building.index_progress().unwrap();
872 assert_eq!(pct, Some(45));
873 assert_eq!(done, 100);
874 assert_eq!(total, 200);
875 }
876
877 #[test]
878 fn test_semantic_availability_downloading() {
879 let downloading = SemanticAvailability::Downloading {
880 progress_pct: 50,
881 bytes_downloaded: 10_000_000,
882 total_bytes: 20_000_000,
883 };
884 assert!(downloading.is_downloading());
885 assert!(downloading.summary().contains("downloading"));
886 assert!(downloading.summary().contains("50%"));
887 assert_eq!(downloading.status_label(), "DL...");
888
889 let (pct, bytes, total) = downloading.download_progress().unwrap();
890 assert_eq!(pct, 50);
891 assert_eq!(bytes, 10_000_000);
892 assert_eq!(total, 20_000_000);
893 }
894
895 #[test]
896 fn test_semantic_availability_tui_states() {
897 let cases: &[AvailabilityTuiCase] = &[
898 (
899 SemanticAvailability::NotInstalled,
900 "LEX",
901 SemanticAvailability::is_not_installed,
902 ),
903 (
904 SemanticAvailability::NeedsConsent,
905 "LEX",
906 SemanticAvailability::needs_consent,
907 ),
908 (SemanticAvailability::Verifying, "VFY...", |state| {
909 state.summary().contains("verifying")
910 }),
911 (SemanticAvailability::HashFallback, "SEM*", |state| {
912 state.is_hash_fallback() && state.can_search()
913 }),
914 (
915 SemanticAvailability::Disabled {
916 reason: "offline mode".into(),
917 },
918 "OFF",
919 |state| state.is_disabled() && state.summary().contains("offline"),
920 ),
921 ];
922
923 for (state, expected_label, predicate) in cases {
924 assert_eq!(state.status_label(), *expected_label, "{state:?}");
925 assert!(predicate(state), "{state:?}");
926 }
927 }
928
929 #[test]
930 fn test_semantic_availability_error_states() {
931 let load_failed = SemanticAvailability::LoadFailed {
932 context: "test error".into(),
933 };
934 assert!(load_failed.is_error());
935 assert_eq!(load_failed.status_label(), "ERR");
936
937 let db_unavail = SemanticAvailability::DatabaseUnavailable {
938 db_path: PathBuf::from("/test"),
939 error: "locked".into(),
940 };
941 assert!(db_unavail.is_error());
942 assert_eq!(db_unavail.status_label(), "NODB");
943 }
944
945 #[test]
946 fn test_needs_index_rebuild_no_index() {
947 let tmp = tempdir().unwrap();
948 assert!(!needs_index_rebuild(tmp.path()));
949 }
950
951 #[test]
952 fn test_delete_vector_index_no_file() {
953 let tmp = tempdir().unwrap();
954 let result = delete_vector_index_for_rebuild(tmp.path());
955 assert!(result.is_ok());
956 assert!(!result.unwrap());
957 }
958
959 fn write_hash_vector_index(path: &Path, record_count: usize) {
960 let embedder = HashEmbedder::default();
961 if let Some(parent) = path.parent() {
962 std::fs::create_dir_all(parent).expect("create vector index parent");
963 }
964 let mut writer = VectorIndex::create_with_revision(
965 path,
966 embedder.id(),
967 "hash",
968 embedder.dimension(),
969 frankensearch::index::Quantization::F16,
970 )
971 .expect("create hash vector index");
972 let mut vector = vec![0.0_f32; embedder.dimension()];
973 vector[0] = 1.0;
974 for idx in 0..record_count {
975 writer
976 .write_record(&format!("doc-{idx}"), &vector)
977 .expect("write hash vector record");
978 }
979 writer.finish().expect("finish hash vector index");
980 }
981
982 #[test]
983 fn load_hash_context_prefers_current_complete_shards_over_monolithic_file() {
984 let tmp = tempdir().unwrap();
985 let db_path = tmp.path().join("cass.db");
986 let storage = FrankenStorage::open(&db_path).expect("create cass db");
987 drop(storage);
988 let db_fingerprint = crate::indexer::lexical_storage_fingerprint_for_db(&db_path)
989 .expect("fingerprint cass db");
990
991 let embedder = HashEmbedder::default();
992 write_hash_vector_index(&vector_index_path(tmp.path(), embedder.id()), 1);
993
994 let mut records = Vec::new();
995 for shard_index in 0..2_u32 {
996 let relative_path = format!("vector_index/shards/hash/shard-{shard_index}.fsvi");
997 let shard_path = tmp.path().join(&relative_path);
998 write_hash_vector_index(&shard_path, 1);
999 records.push(SemanticShardRecord {
1000 tier: TierKind::Fast,
1001 embedder_id: embedder.id().to_string(),
1002 model_revision: "hash".to_string(),
1003 schema_version: crate::search::policy::SEMANTIC_SCHEMA_VERSION,
1004 chunking_version: crate::search::policy::CHUNKING_STRATEGY_VERSION,
1005 dimension: embedder.dimension(),
1006 shard_index,
1007 shard_count: 2,
1008 doc_count: 1,
1009 total_conversations: 1,
1010 db_fingerprint: db_fingerprint.clone(),
1011 index_path: relative_path,
1012 quantization: "f16".to_string(),
1013 mmap_ready: true,
1014 ann_index_path: None,
1015 ann_size_bytes: 0,
1016 ann_ready: false,
1017 size_bytes: std::fs::metadata(&shard_path)
1018 .expect("stat hash shard")
1019 .len(),
1020 started_at_ms: 1_733_100_000_000,
1021 completed_at_ms: 1_733_100_000_000 + i64::from(shard_index),
1022 ready: true,
1023 });
1024 }
1025 let mut manifest = SemanticShardManifest {
1026 shards: records,
1027 ..Default::default()
1028 };
1029 manifest.save(tmp.path()).expect("save shard manifest");
1030
1031 let setup = load_hash_semantic_context(tmp.path(), &db_path);
1032 assert!(
1033 matches!(setup.availability, SemanticAvailability::HashFallback),
1034 "hash semantic availability should remain ready: {:?}",
1035 setup.availability
1036 );
1037 let context = setup
1038 .context
1039 .expect("complete current shards should load a semantic context");
1040 assert_eq!(
1041 context.additional_indexes.len(),
1042 1,
1043 "complete current shards must not be shadowed by an older monolithic vector file"
1044 );
1045 let loaded_records = context.index.record_count()
1046 + context
1047 .additional_indexes
1048 .iter()
1049 .map(VectorIndex::record_count)
1050 .sum::<usize>();
1051 assert_eq!(loaded_records, 2);
1052 }
1053}