1use crate::digest::{CollectionLevel1, CollectionLevel2, SeqColComparison, SequenceCollectionMetadata};
8use crate::digest::types::{compare_arrays, level2_to_comparison_arrays};
9use crate::hashkeyable::HashKeyable;
10use crate::store::{PagedResult, ReadonlyRefgetStore};
11use anyhow::{anyhow, Result};
12
13pub trait SeqColService {
21 fn get_collection_level1(&self, digest: &str) -> Result<CollectionLevel1>;
23
24 fn get_collection_level2(&self, digest: &str) -> Result<CollectionLevel2>;
26
27 fn compare(&self, digest_a: &str, digest_b: &str) -> Result<SeqColComparison>;
29
30 fn compare_with_level2(
32 &self,
33 digest_a: &str,
34 external: &CollectionLevel2,
35 ) -> Result<SeqColComparison>;
36
37 fn find_collections_by_attribute(
39 &self,
40 attr_name: &str,
41 attr_digest: &str,
42 ) -> Result<Vec<String>>;
43
44 fn get_attribute(
46 &self,
47 attr_name: &str,
48 attr_digest: &str,
49 ) -> Result<Option<serde_json::Value>>;
50
51 fn list_collections(
53 &self,
54 page: usize,
55 page_size: usize,
56 filters: &[(&str, &str)],
57 ) -> Result<PagedResult<SequenceCollectionMetadata>>;
58
59 fn collection_count(&self) -> usize;
61}
62
63pub(crate) fn metadata_matches_attribute(
66 meta: &SequenceCollectionMetadata,
67 attr_name: &str,
68 attr_digest: &str,
69) -> Result<bool> {
70 match attr_name {
71 "names" => Ok(meta.names_digest == attr_digest),
72 "lengths" => Ok(meta.lengths_digest == attr_digest),
73 "sequences" => Ok(meta.sequences_digest == attr_digest),
74 "name_length_pairs" => Ok(meta
75 .name_length_pairs_digest
76 .as_deref()
77 .map_or(false, |d| d == attr_digest)),
78 "sorted_name_length_pairs" => Ok(meta
79 .sorted_name_length_pairs_digest
80 .as_deref()
81 .map_or(false, |d| d == attr_digest)),
82 "sorted_sequences" => Ok(meta
83 .sorted_sequences_digest
84 .as_deref()
85 .map_or(false, |d| d == attr_digest)),
86 _ => Err(anyhow!(
87 "Unknown attribute: '{}'. Supported: names, lengths, sequences, \
88 name_length_pairs, sorted_name_length_pairs, sorted_sequences",
89 attr_name
90 )),
91 }
92}
93
94const ATTRIBUTE_SEARCH_WARN_THRESHOLD: usize = 10_000;
96
97const ATTRIBUTE_SEARCH_ERROR_THRESHOLD: usize = 100_000;
99
100impl ReadonlyRefgetStore {
101 pub fn enable_ancillary_digests(&mut self) {
103 self.ancillary_digests = true;
104 }
105
106 pub fn disable_ancillary_digests(&mut self) {
108 self.ancillary_digests = false;
109 }
110
111 pub fn has_ancillary_digests(&self) -> bool {
113 self.ancillary_digests
114 }
115
116 pub fn has_attribute_index(&self) -> bool {
118 self.attribute_index
119 }
120
121 pub fn get_collection_level1(&self, digest: &str) -> Result<CollectionLevel1> {
124 let key = digest.to_key();
125 let record = self
126 .collections
127 .get(&key)
128 .ok_or_else(|| anyhow!("Collection not found: {}", digest))?;
129 Ok(record.metadata().to_level1())
130 }
131
132 pub fn get_collection_level2(&self, digest: &str) -> Result<CollectionLevel2> {
135 let collection = self.get_collection(digest)?;
136 Ok(collection.to_level2())
137 }
138
139 pub fn compare(&self, digest_a: &str, digest_b: &str) -> Result<SeqColComparison> {
141 let coll_a = self.get_collection(digest_a)?;
142 let coll_b = self.get_collection(digest_b)?;
143 Ok(coll_a.compare(&coll_b))
144 }
145
146 pub fn compare_with_level2(
155 &self,
156 digest_a: &str,
157 external: &CollectionLevel2,
158 ) -> Result<SeqColComparison> {
159 let coll_a = self.get_collection(digest_a)?;
160 let arrays_a = coll_a.to_comparison_arrays();
161 let arrays_b = level2_to_comparison_arrays(external);
162 Ok(compare_arrays(
163 arrays_a,
164 arrays_b,
165 coll_a.metadata.digest.clone(),
166 None,
167 ))
168 }
169
170 pub fn find_collections_by_attribute(
178 &self,
179 attr_name: &str,
180 attr_digest: &str,
181 ) -> Result<Vec<String>> {
182 if self.attribute_index {
183 self.find_collections_by_attribute_indexed(attr_name, attr_digest)
184 } else {
185 self.find_collections_by_attribute_scan(attr_name, attr_digest)
186 }
187 }
188
189 fn find_collections_by_attribute_scan(
192 &self,
193 attr_name: &str,
194 attr_digest: &str,
195 ) -> Result<Vec<String>> {
196 let count = self.collections.len();
197
198 if count > ATTRIBUTE_SEARCH_ERROR_THRESHOLD {
199 return Err(anyhow!(
200 "Brute-force attribute search is limited to {} collections ({} in store). \
201 Indexed attribute lookup is planned for a future release.",
202 ATTRIBUTE_SEARCH_ERROR_THRESHOLD,
203 count
204 ));
205 }
206
207 if count > ATTRIBUTE_SEARCH_WARN_THRESHOLD {
208 eprintln!(
209 "Warning: brute-force attribute search scanning {} collections. \
210 This may be slow.",
211 count
212 );
213 }
214
215 let mut results = Vec::new();
216 for record in self.collections.values() {
217 let meta = record.metadata();
218 if metadata_matches_attribute(meta, attr_name, attr_digest)? {
219 results.push(meta.digest.clone());
220 }
221 }
222 Ok(results)
223 }
224
225 pub fn get_attribute(
235 &self,
236 attr_name: &str,
237 attr_digest: &str,
238 ) -> Result<Option<serde_json::Value>> {
239 let collections = self.find_collections_by_attribute(attr_name, attr_digest)?;
240 if collections.is_empty() {
241 return Ok(None);
242 }
243
244 let collection = self.get_collection(&collections[0])?;
246 let lvl2 = collection.to_level2();
247
248 let value = match attr_name {
249 "names" => serde_json::Value::Array(
250 lvl2.names
251 .iter()
252 .map(|s| serde_json::Value::String(s.clone()))
253 .collect(),
254 ),
255 "lengths" => serde_json::Value::Array(
256 lvl2.lengths
257 .iter()
258 .map(|l| serde_json::Value::Number(serde_json::Number::from(*l)))
259 .collect(),
260 ),
261 "sequences" => serde_json::Value::Array(
262 lvl2.sequences
263 .iter()
264 .map(|s| serde_json::Value::String(s.clone()))
265 .collect(),
266 ),
267 "sorted_sequences" => serde_json::Value::Array(
268 collection
269 .build_sorted_sequences()
270 .into_iter()
271 .map(serde_json::Value::String)
272 .collect(),
273 ),
274 "name_length_pairs" => {
275 serde_json::Value::Array(collection.build_name_length_pairs())
276 }
277 "sorted_name_length_pairs" => {
278 serde_json::Value::Array(collection.build_sorted_name_length_pairs())
279 }
280 _ => {
281 return Err(anyhow!(
282 "Unknown attribute: '{}'. Supported: names, lengths, sequences, \
283 name_length_pairs, sorted_name_length_pairs, sorted_sequences",
284 attr_name
285 ))
286 }
287 };
288
289 Ok(Some(value))
290 }
291
292 pub fn enable_attribute_index(&mut self) {
298 self.attribute_index = true;
299 }
300
301 pub fn disable_attribute_index(&mut self) {
303 self.attribute_index = false;
304 }
305
306 fn find_collections_by_attribute_indexed(
309 &self,
310 _attr_name: &str,
311 _attr_digest: &str,
312 ) -> Result<Vec<String>> {
313 Err(anyhow!(
314 "Indexed attribute lookup is not yet implemented. \
315 This feature is planned for a future release. \
316 For now, use the brute-force scan by keeping attribute_index disabled."
317 ))
318 }
319
320 pub fn collection_count(&self) -> usize {
322 self.collections.len()
323 }
324}
325
326impl SeqColService for ReadonlyRefgetStore {
327 fn get_collection_level1(&self, digest: &str) -> Result<CollectionLevel1> {
328 ReadonlyRefgetStore::get_collection_level1(self, digest)
329 }
330
331 fn get_collection_level2(&self, digest: &str) -> Result<CollectionLevel2> {
332 ReadonlyRefgetStore::get_collection_level2(self, digest)
333 }
334
335 fn compare(&self, digest_a: &str, digest_b: &str) -> Result<SeqColComparison> {
336 ReadonlyRefgetStore::compare(self, digest_a, digest_b)
337 }
338
339 fn compare_with_level2(
340 &self,
341 digest_a: &str,
342 external: &CollectionLevel2,
343 ) -> Result<SeqColComparison> {
344 ReadonlyRefgetStore::compare_with_level2(self, digest_a, external)
345 }
346
347 fn find_collections_by_attribute(
348 &self,
349 attr_name: &str,
350 attr_digest: &str,
351 ) -> Result<Vec<String>> {
352 ReadonlyRefgetStore::find_collections_by_attribute(self, attr_name, attr_digest)
353 }
354
355 fn get_attribute(
356 &self,
357 attr_name: &str,
358 attr_digest: &str,
359 ) -> Result<Option<serde_json::Value>> {
360 ReadonlyRefgetStore::get_attribute(self, attr_name, attr_digest)
361 }
362
363 fn list_collections(
364 &self,
365 page: usize,
366 page_size: usize,
367 filters: &[(&str, &str)],
368 ) -> Result<PagedResult<SequenceCollectionMetadata>> {
369 ReadonlyRefgetStore::list_collections(self, page, page_size, filters)
370 }
371
372 fn collection_count(&self) -> usize {
373 ReadonlyRefgetStore::collection_count(self)
374 }
375}
376
377#[cfg(test)]
378mod tests {
379 use crate::store::{FastaImportOptions, ReadonlyRefgetStore, RefgetStore};
380 use std::path::PathBuf;
381
382 fn copy_test_fasta(temp_dir: &std::path::Path, name: &str) -> PathBuf {
385 let src = format!("../tests/data/fasta/{}", name);
386 let dst = temp_dir.join(name);
387 std::fs::copy(&src, &dst)
388 .unwrap_or_else(|e| panic!("Failed to copy {} to tempdir: {}", src, e));
389 dst
390 }
391
392 #[test]
393 fn test_ancillary_digests_computed() {
394 let mut store = RefgetStore::in_memory();
395 assert!(store.has_ancillary_digests());
396
397 let (metadata, _) = store
398 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
399 .unwrap();
400
401 assert!(metadata.name_length_pairs_digest.is_some());
403 assert!(metadata.sorted_name_length_pairs_digest.is_some());
404 assert!(metadata.sorted_sequences_digest.is_some());
405
406 let coll_meta = store.get_collection_metadata(&metadata.digest).unwrap();
408 assert!(coll_meta.name_length_pairs_digest.is_some());
409 assert!(coll_meta.sorted_name_length_pairs_digest.is_some());
410 assert!(coll_meta.sorted_sequences_digest.is_some());
411 }
412
413 #[test]
414 fn test_ancillary_digests_disabled() {
415 let mut store = RefgetStore::in_memory();
416 store.disable_ancillary_digests();
417 assert!(!store.has_ancillary_digests());
418
419 let (metadata, _) = store
420 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
421 .unwrap();
422
423 assert!(metadata.name_length_pairs_digest.is_none());
425 assert!(metadata.sorted_name_length_pairs_digest.is_none());
426 assert!(metadata.sorted_sequences_digest.is_none());
427 }
428
429 #[test]
430 fn test_collection_level1() {
431 let mut store = RefgetStore::in_memory();
432 let (metadata, _) = store
433 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
434 .unwrap();
435
436 let lvl1 = store.get_collection_level1(&metadata.digest).unwrap();
437 assert_eq!(lvl1.names, metadata.names_digest);
438 assert_eq!(lvl1.lengths, metadata.lengths_digest);
439 assert_eq!(lvl1.sequences, metadata.sequences_digest);
440 assert!(lvl1.name_length_pairs.is_some());
441 assert!(lvl1.sorted_name_length_pairs.is_some());
442 assert!(lvl1.sorted_sequences.is_some());
443 }
444
445 #[test]
446 fn test_collection_level2() {
447 let mut store = RefgetStore::in_memory();
448 let (metadata, _) = store
449 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
450 .unwrap();
451
452 let lvl2 = store.get_collection_level2(&metadata.digest).unwrap();
453 assert_eq!(lvl2.names.len(), 3); assert_eq!(lvl2.lengths.len(), 3);
455 assert_eq!(lvl2.sequences.len(), 3);
456
457 for seq in &lvl2.sequences {
459 assert!(seq.starts_with("SQ."), "Expected SQ. prefix, got: {}", seq);
460 }
461
462 assert!(lvl2.lengths.contains(&8)); assert!(lvl2.lengths.contains(&4)); }
466
467 #[test]
468 fn test_compare_collections() {
469 let mut store = RefgetStore::in_memory();
470 let (meta_a, _) = store
471 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
472 .unwrap();
473 let (meta_b, _) = store
474 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
475 .unwrap();
476
477 let self_result = store.compare(&meta_a.digest, &meta_a.digest).unwrap();
479 assert_eq!(Some(self_result.digests.a.as_str()), self_result.digests.b.as_deref());
480 assert_eq!(self_result.attributes.a_and_b.len(), 6); for attr in &self_result.attributes.a_and_b {
482 assert_eq!(self_result.array_elements.a_and_b_same_order[attr], Some(true));
483 }
484
485 let cross_result = store.compare(&meta_a.digest, &meta_b.digest).unwrap();
487 assert_ne!(Some(cross_result.digests.a.as_str()), cross_result.digests.b.as_deref());
488 assert_eq!(cross_result.attributes.a_and_b.len(), 6);
489 assert!(cross_result.attributes.a_only.is_empty());
490 assert!(cross_result.attributes.b_only.is_empty());
491 }
492
493 #[test]
494 fn test_compare_mixed_ancillary() {
495 let mut store = RefgetStore::in_memory();
496 let (meta_a, _) = store
497 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
498 .unwrap();
499 store.disable_ancillary_digests();
500 let (meta_b, _) = store
501 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
502 .unwrap();
503
504 let result = store.compare(&meta_a.digest, &meta_b.digest).unwrap();
505 assert_eq!(result.attributes.a_and_b.len(), 3);
506 assert_eq!(result.attributes.a_only.len(), 3);
507 assert!(result.attributes.b_only.is_empty());
508 }
509
510 #[test]
511 fn test_find_collections_by_attribute() {
512 let mut store = RefgetStore::in_memory();
513 let (metadata, _) = store
514 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
515 .unwrap();
516
517 let results = store
519 .find_collections_by_attribute("names", &metadata.names_digest)
520 .unwrap();
521 assert_eq!(results.len(), 1);
522 assert_eq!(results[0], metadata.digest);
523
524 let results = store
526 .find_collections_by_attribute("lengths", &metadata.lengths_digest)
527 .unwrap();
528 assert_eq!(results.len(), 1);
529
530 let results = store
532 .find_collections_by_attribute("sequences", &metadata.sequences_digest)
533 .unwrap();
534 assert_eq!(results.len(), 1);
535
536 let nlp = metadata.name_length_pairs_digest.as_ref().unwrap();
538 let results = store
539 .find_collections_by_attribute("name_length_pairs", nlp)
540 .unwrap();
541 assert_eq!(results.len(), 1);
542
543 let results = store
545 .find_collections_by_attribute("names", "nonexistent")
546 .unwrap();
547 assert!(results.is_empty());
548
549 assert!(store
551 .find_collections_by_attribute("unknown", "digest")
552 .is_err());
553 }
554
555 #[test]
556 fn test_get_attribute() {
557 let mut store = RefgetStore::in_memory();
558 let (metadata, _) = store
559 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
560 .unwrap();
561
562 let result = store
564 .get_attribute("names", &metadata.names_digest)
565 .unwrap();
566 assert!(result.is_some());
567 let names = result.unwrap();
568 assert!(names.is_array());
569 assert_eq!(names.as_array().unwrap().len(), 3);
570
571 let result = store
573 .get_attribute("lengths", &metadata.lengths_digest)
574 .unwrap();
575 assert!(result.is_some());
576
577 let result = store.get_attribute("names", "nonexistent").unwrap();
579 assert!(result.is_none());
580 }
581
582 #[test]
583 fn test_get_attribute_sorted_sequences() {
584 let mut store = RefgetStore::in_memory();
585 let (metadata, _) = store
586 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
587 .unwrap();
588
589 let digest = metadata.sorted_sequences_digest.as_ref().unwrap();
590 let result = store.get_attribute("sorted_sequences", digest).unwrap();
591 assert!(result.is_some());
592 let arr = result.unwrap();
593 assert!(arr.is_array());
594 let items = arr.as_array().unwrap();
595 assert_eq!(items.len(), 3);
596
597 for item in items {
599 let s = item.as_str().unwrap();
600 assert!(s.starts_with("SQ."), "Expected SQ. prefix, got: {}", s);
601 }
602
603 let strings: Vec<&str> = items.iter().map(|v| v.as_str().unwrap()).collect();
605 let mut sorted = strings.clone();
606 sorted.sort();
607 assert_eq!(strings, sorted, "sorted_sequences should be in sorted order");
608 }
609
610 #[test]
611 fn test_get_attribute_name_length_pairs() {
612 let mut store = RefgetStore::in_memory();
613 let (metadata, _) = store
614 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
615 .unwrap();
616
617 let digest = metadata.name_length_pairs_digest.as_ref().unwrap();
618 let result = store.get_attribute("name_length_pairs", digest).unwrap();
619 assert!(result.is_some());
620 let arr = result.unwrap();
621 assert!(arr.is_array());
622 let items = arr.as_array().unwrap();
623 assert_eq!(items.len(), 3);
624
625 for item in items {
627 let obj = item.as_object().unwrap();
628 assert!(obj.contains_key("name"), "Expected 'name' key in object");
629 assert!(obj.contains_key("length"), "Expected 'length' key in object");
630 assert!(obj["name"].is_string(), "name should be a string");
631 assert!(obj["length"].is_number(), "length should be a number");
632 }
633 }
634
635 #[test]
636 fn test_get_attribute_sorted_name_length_pairs() {
637 use crate::digest::algorithms::{canonicalize_json, sha512t24u};
638
639 let mut store = RefgetStore::in_memory();
640 let (metadata, _) = store
641 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
642 .unwrap();
643
644 let digest = metadata.sorted_name_length_pairs_digest.as_ref().unwrap();
645 let result = store
646 .get_attribute("sorted_name_length_pairs", digest)
647 .unwrap();
648 assert!(result.is_some());
649 let arr = result.unwrap();
650 assert!(arr.is_array());
651 let items = arr.as_array().unwrap();
652 assert_eq!(items.len(), 3);
653
654 for item in items {
656 let obj = item.as_object().unwrap();
657 assert!(obj.contains_key("name"));
658 assert!(obj.contains_key("length"));
659 }
660
661 let digests: Vec<String> = items
663 .iter()
664 .map(|v| sha512t24u(canonicalize_json(v).as_bytes()))
665 .collect();
666 let mut sorted_digests = digests.clone();
667 sorted_digests.sort();
668 assert_eq!(
669 digests, sorted_digests,
670 "sorted_name_length_pairs objects should be in sorted digest order"
671 );
672 }
673
674 #[test]
675 fn test_get_attribute_ancillary_not_computed() {
676 let mut store = RefgetStore::in_memory();
677 store.disable_ancillary_digests();
678 let (metadata, _) = store
679 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
680 .unwrap();
681
682 assert!(metadata.name_length_pairs_digest.is_none());
684 let result = store
685 .get_attribute("name_length_pairs", "some_digest")
686 .unwrap();
687 assert!(
688 result.is_none(),
689 "Expected None when no ancillary digests are computed"
690 );
691 }
692
693 #[test]
694 fn test_rgci_roundtrip_with_ancillary() {
695 let dir = tempfile::tempdir().unwrap();
696 let dir_path = dir.path();
697 let temp_fasta = copy_test_fasta(dir_path, "base.fa");
698
699 {
701 let mut store = RefgetStore::on_disk(dir_path).unwrap();
702 store
703 .add_sequence_collection_from_fasta(&temp_fasta, FastaImportOptions::new())
704 .unwrap();
705 store.write().unwrap();
706 }
707
708 {
710 let store = RefgetStore::open_local(dir_path).unwrap();
711 let collections = store.list_collections(0, usize::MAX, &[]).unwrap();
712 assert_eq!(collections.results.len(), 1);
713
714 let meta = &collections.results[0];
715 assert!(meta.name_length_pairs_digest.is_some());
716 assert!(meta.sorted_name_length_pairs_digest.is_some());
717 assert!(meta.sorted_sequences_digest.is_some());
718 }
719 }
720
721 #[test]
728 fn test_compliance_digests_from_fixture() {
729 let fixture_path = "../tests/data/fasta/test_fasta_digests.json";
730 let fixture_str = std::fs::read_to_string(fixture_path)
731 .unwrap_or_else(|e| panic!("Failed to read {}: {}", fixture_path, e));
732 let fixture: serde_json::Value = serde_json::from_str(&fixture_str)
733 .unwrap_or_else(|e| panic!("Failed to parse {}: {}", fixture_path, e));
734
735 let mut store = RefgetStore::in_memory();
736 store.enable_ancillary_digests();
737
738 for (fa_name, bundle) in fixture.as_object().unwrap() {
739 let fasta_path = format!("../tests/data/fasta/{}", fa_name);
740 let (meta, _) = store
741 .add_sequence_collection_from_fasta(&fasta_path, FastaImportOptions::new())
742 .unwrap_or_else(|e| panic!("{}: {}", fa_name, e));
743
744 let lvl1 = bundle["level1"].as_object().unwrap();
745 let expected_digest = bundle["top_level_digest"].as_str().unwrap();
746
747 assert_eq!(meta.digest, expected_digest, "{}: top_level_digest", fa_name);
748 assert_eq!(meta.names_digest, lvl1["names"].as_str().unwrap(), "{}: names", fa_name);
749 assert_eq!(meta.lengths_digest, lvl1["lengths"].as_str().unwrap(), "{}: lengths", fa_name);
750 assert_eq!(meta.sequences_digest, lvl1["sequences"].as_str().unwrap(), "{}: sequences", fa_name);
751 assert_eq!(
752 meta.sorted_sequences_digest.as_deref(),
753 Some(lvl1["sorted_sequences"].as_str().unwrap()),
754 "{}: sorted_sequences", fa_name
755 );
756 assert_eq!(
757 meta.name_length_pairs_digest.as_deref(),
758 Some(lvl1["name_length_pairs"].as_str().unwrap()),
759 "{}: name_length_pairs", fa_name
760 );
761 assert_eq!(
762 meta.sorted_name_length_pairs_digest.as_deref(),
763 Some(lvl1["sorted_name_length_pairs"].as_str().unwrap()),
764 "{}: sorted_name_length_pairs", fa_name
765 );
766 }
767 }
768
769 #[test]
770 fn test_store_config_persisted() {
771 let dir = tempfile::tempdir().unwrap();
772 let dir_path = dir.path();
773 let temp_fasta = copy_test_fasta(dir_path, "base.fa");
774
775 {
777 let mut store = RefgetStore::on_disk(dir_path).unwrap();
778 assert!(store.has_ancillary_digests());
779 assert!(!store.has_attribute_index());
780 store
781 .add_sequence_collection_from_fasta(&temp_fasta, FastaImportOptions::new())
782 .unwrap();
783 store.write().unwrap();
784 }
785
786 {
788 let store = RefgetStore::open_local(dir_path).unwrap();
789 assert!(store.has_ancillary_digests());
790 assert!(!store.has_attribute_index());
791 }
792 }
793
794 #[test]
800 fn test_compare_with_level2_self_identical() {
801 let mut store = RefgetStore::in_memory();
802 let (meta, _) = store
803 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
804 .unwrap();
805
806 let level2 = store.get_collection_level2(&meta.digest).unwrap();
808
809 let result = store.compare_with_level2(&meta.digest, &level2).unwrap();
811
812 assert_eq!(result.digests.a, meta.digest);
814 assert!(result.digests.b.is_none(), "digests.b should be None for external level-2 comparison");
815
816 assert!(result.attributes.a_and_b.contains(&"names".to_string()));
818 assert!(result.attributes.a_and_b.contains(&"lengths".to_string()));
819 assert!(result.attributes.a_and_b.contains(&"sequences".to_string()));
820
821 for attr in &["names", "lengths", "sequences"] {
823 assert_eq!(
824 result.array_elements.a_and_b_same_order[*attr],
825 Some(true),
826 "{} should be in same order",
827 attr
828 );
829 }
830 }
831
832 #[test]
835 fn test_compare_with_level2_cross_compare() {
836 let mut store = RefgetStore::in_memory();
837 let (meta_a, _) = store
838 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
839 .unwrap();
840 let (meta_b, _) = store
841 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
842 .unwrap();
843
844 let level2_b = store.get_collection_level2(&meta_b.digest).unwrap();
846
847 let compare_result = store.compare(&meta_a.digest, &meta_b.digest).unwrap();
849 let with_level2_result = store.compare_with_level2(&meta_a.digest, &level2_b).unwrap();
851
852 assert_eq!(compare_result.digests.b, Some(meta_b.digest.clone()));
854 assert!(with_level2_result.digests.b.is_none());
855
856 assert_eq!(compare_result.digests.a, with_level2_result.digests.a);
858 for attr in &["names", "lengths", "sequences"] {
860 assert!(
861 with_level2_result.attributes.a_and_b.contains(&attr.to_string())
862 || with_level2_result.attributes.a_only.contains(&attr.to_string())
863 || with_level2_result.attributes.b_only.contains(&attr.to_string()),
864 "attr {} must appear somewhere",
865 attr
866 );
867 }
868 }
869
870 #[test]
872 fn test_compare_with_level2_unknown_digest_returns_error() {
873 let store = RefgetStore::in_memory();
874 use crate::digest::CollectionLevel2;
876 let level2 = CollectionLevel2 {
877 names: vec!["chr1".to_string()],
878 lengths: vec![100],
879 sequences: vec!["SQ.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string()],
880 };
881
882 let result = store.compare_with_level2("nonexistent_digest", &level2);
883 assert!(result.is_err(), "Expected error for unknown digest");
884 }
885
886 #[test]
889 fn test_compare_with_level2_ancillary_in_a_only() {
890 let mut store = RefgetStore::in_memory();
891 let (meta, _) = store
893 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
894 .unwrap();
895
896 assert!(meta.name_length_pairs_digest.is_some());
898 assert!(meta.sorted_name_length_pairs_digest.is_some());
899 assert!(meta.sorted_sequences_digest.is_some());
900
901 let level2 = store.get_collection_level2(&meta.digest).unwrap();
903
904 let result = store.compare_with_level2(&meta.digest, &level2).unwrap();
905
906 assert!(
909 result.attributes.a_only.contains(&"sorted_sequences".to_string()),
910 "sorted_sequences should be in a_only"
911 );
912 assert!(
913 result.attributes.a_only.contains(&"name_length_pairs".to_string()),
914 "name_length_pairs should be in a_only"
915 );
916 assert!(
917 result.attributes.a_only.contains(&"sorted_name_length_pairs".to_string()),
918 "sorted_name_length_pairs should be in a_only"
919 );
920
921 assert!(
923 result.attributes.b_only.is_empty(),
924 "b_only should be empty when level-2 has only core attributes"
925 );
926 }
927
928 #[test]
933 fn test_list_collections_paged_no_filters() {
934 let mut store = RefgetStore::in_memory();
935 store
936 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
937 .unwrap();
938 store
939 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
940 .unwrap();
941
942 let result = store.list_collections(0, 2, &[]).unwrap();
943 assert_eq!(result.results.len(), 2);
944 assert_eq!(result.pagination.total, 2);
945 assert_eq!(result.pagination.page, 0);
946 assert_eq!(result.pagination.page_size, 2);
947 assert!(result.results[0].digest <= result.results[1].digest);
949 }
950
951 #[test]
952 fn test_list_collections_paged_second_page() {
953 let mut store = RefgetStore::in_memory();
954 store
955 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
956 .unwrap();
957 store
958 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
959 .unwrap();
960
961 let page0 = store.list_collections(0, 1, &[]).unwrap();
963 assert_eq!(page0.results.len(), 1);
964 assert_eq!(page0.pagination.total, 2);
965
966 let page1 = store.list_collections(1, 1, &[]).unwrap();
968 assert_eq!(page1.results.len(), 1);
969 assert_eq!(page1.pagination.total, 2);
970
971 assert_ne!(page0.results[0].digest, page1.results[0].digest);
973 }
974
975 #[test]
976 fn test_list_collections_paged_beyond_end() {
977 let mut store = RefgetStore::in_memory();
978 store
979 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
980 .unwrap();
981
982 let result = store.list_collections(10, 100, &[]).unwrap();
983 assert!(result.results.is_empty());
984 assert_eq!(result.pagination.total, 1);
985 }
986
987 #[test]
988 fn test_list_collections_single_filter() {
989 let mut store = RefgetStore::in_memory();
990 let (meta, _) = store
991 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
992 .unwrap();
993 store
994 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
995 .unwrap();
996
997 let result = store.list_collections(0, 100, &[("names", &meta.names_digest)]).unwrap();
998 assert_eq!(result.results.len(), 1);
999 assert_eq!(result.results[0].digest, meta.digest);
1000 assert_eq!(result.pagination.total, 1);
1001 }
1002
1003 #[test]
1004 fn test_list_collections_multi_filter_and() {
1005 let mut store = RefgetStore::in_memory();
1006 let (meta, _) = store
1007 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
1008 .unwrap();
1009 store
1010 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
1011 .unwrap();
1012
1013 let result = store.list_collections(0, 100, &[
1015 ("names", &meta.names_digest),
1016 ("lengths", &meta.lengths_digest),
1017 ]).unwrap();
1018 assert_eq!(result.results.len(), 1);
1019 assert_eq!(result.results[0].digest, meta.digest);
1020 }
1021
1022 #[test]
1023 fn test_list_collections_filter_no_match() {
1024 let mut store = RefgetStore::in_memory();
1025 store
1026 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
1027 .unwrap();
1028
1029 let result = store.list_collections(0, 100, &[("names", "nonexistent_digest")]).unwrap();
1030 assert!(result.results.is_empty());
1031 assert_eq!(result.pagination.total, 0);
1032 }
1033
1034 #[test]
1035 fn test_list_collections_invalid_attribute() {
1036 let mut store = RefgetStore::in_memory();
1037 store
1038 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
1039 .unwrap();
1040
1041 let result = store.list_collections(0, 100, &[("unknown_attr", "digest")]);
1042 assert!(result.is_err());
1043 }
1044
1045 #[test]
1046 fn test_list_collections_filter_with_pagination() {
1047 let mut store = RefgetStore::in_memory();
1048 let (meta_a, _) = store
1050 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
1051 .unwrap();
1052 let (_meta_b, _) = store
1053 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
1054 .unwrap();
1055
1056 let page0 = store.list_collections(0, 1, &[("lengths", &meta_a.lengths_digest)]).unwrap();
1058 assert_eq!(page0.results.len(), 1);
1059 assert_eq!(page0.pagination.total, 2); let page1 = store.list_collections(1, 1, &[("lengths", &meta_a.lengths_digest)]).unwrap();
1062 assert_eq!(page1.results.len(), 1);
1063 assert_eq!(page1.pagination.total, 2);
1064
1065 assert_ne!(page0.results[0].digest, page1.results[0].digest);
1066 }
1067
1068 #[test]
1073 fn test_trait_object_safety() {
1074 use super::SeqColService;
1075 use std::sync::Arc;
1076
1077 let mut store = RefgetStore::in_memory();
1078 let (meta, _) = store
1079 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
1080 .unwrap();
1081
1082 let readonly = store.into_readonly();
1083 let service: Arc<dyn SeqColService + Send + Sync> = Arc::new(readonly);
1084
1085 let lvl1 = service.get_collection_level1(&meta.digest).unwrap();
1087 assert_eq!(lvl1.names, meta.names_digest);
1088
1089 let lvl2 = service.get_collection_level2(&meta.digest).unwrap();
1090 assert_eq!(lvl2.names.len(), 3);
1091
1092 let cmp = service.compare(&meta.digest, &meta.digest).unwrap();
1093 assert_eq!(cmp.digests.a, meta.digest);
1094
1095 let cmp2 = service.compare_with_level2(&meta.digest, &lvl2).unwrap();
1096 assert_eq!(cmp2.digests.a, meta.digest);
1097
1098 let found = service
1099 .find_collections_by_attribute("names", &meta.names_digest)
1100 .unwrap();
1101 assert_eq!(found.len(), 1);
1102
1103 let attr = service
1104 .get_attribute("names", &meta.names_digest)
1105 .unwrap();
1106 assert!(attr.is_some());
1107
1108 let paged = service.list_collections(0, 10, &[]).unwrap();
1109 assert_eq!(paged.results.len(), 1);
1110
1111 assert_eq!(service.collection_count(), 1);
1112 }
1113
1114 #[test]
1115 fn test_collection_count() {
1116 let mut store = RefgetStore::in_memory();
1117 store
1118 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
1119 .unwrap();
1120 store
1121 .add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
1122 .unwrap();
1123
1124 assert_eq!(store.collection_count(), 2);
1125 }
1126
1127 #[test]
1128 fn test_trait_methods_match_concrete() {
1129 use super::SeqColService;
1130
1131 let mut store = RefgetStore::in_memory();
1132 let (meta, _) = store
1133 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
1134 .unwrap();
1135
1136 let readonly = store.into_readonly();
1137
1138 let concrete_lvl1 = ReadonlyRefgetStore::get_collection_level1(&readonly, &meta.digest).unwrap();
1140 let trait_ref: &dyn SeqColService = &readonly;
1142 let trait_lvl1 = trait_ref.get_collection_level1(&meta.digest).unwrap();
1143
1144 assert_eq!(concrete_lvl1.names, trait_lvl1.names);
1145 assert_eq!(concrete_lvl1.lengths, trait_lvl1.lengths);
1146 assert_eq!(concrete_lvl1.sequences, trait_lvl1.sequences);
1147
1148 let concrete_list = ReadonlyRefgetStore::list_collections(&readonly, 0, 10, &[]).unwrap();
1150 let trait_list = trait_ref.list_collections(0, 10, &[]).unwrap();
1151 assert_eq!(concrete_list.results.len(), trait_list.results.len());
1152 assert_eq!(concrete_list.pagination.total, trait_list.pagination.total);
1153
1154 assert_eq!(
1156 ReadonlyRefgetStore::collection_count(&readonly),
1157 trait_ref.collection_count()
1158 );
1159 }
1160}