use crate::digest::{CollectionLevel1, CollectionLevel2, SeqColComparison, SequenceCollectionMetadata};
use crate::digest::types::{compare_arrays, level2_to_comparison_arrays};
use crate::hashkeyable::HashKeyable;
use crate::store::{PagedResult, ReadonlyRefgetStore};
use anyhow::{anyhow, Result};
pub trait SeqColService {
fn get_collection_level1(&self, digest: &str) -> Result<CollectionLevel1>;
fn get_collection_level2(&self, digest: &str) -> Result<CollectionLevel2>;
fn compare(&self, digest_a: &str, digest_b: &str) -> Result<SeqColComparison>;
fn compare_with_level2(
&self,
digest_a: &str,
external: &CollectionLevel2,
) -> Result<SeqColComparison>;
fn find_collections_by_attribute(
&self,
attr_name: &str,
attr_digest: &str,
) -> Result<Vec<String>>;
fn get_attribute(
&self,
attr_name: &str,
attr_digest: &str,
) -> Result<Option<serde_json::Value>>;
fn list_collections(
&self,
page: usize,
page_size: usize,
filters: &[(&str, &str)],
) -> Result<PagedResult<SequenceCollectionMetadata>>;
fn collection_count(&self) -> usize;
}
pub(crate) fn metadata_matches_attribute(
meta: &SequenceCollectionMetadata,
attr_name: &str,
attr_digest: &str,
) -> Result<bool> {
match attr_name {
"names" => Ok(meta.names_digest == attr_digest),
"lengths" => Ok(meta.lengths_digest == attr_digest),
"sequences" => Ok(meta.sequences_digest == attr_digest),
"name_length_pairs" => Ok(meta
.name_length_pairs_digest
.as_deref()
.map_or(false, |d| d == attr_digest)),
"sorted_name_length_pairs" => Ok(meta
.sorted_name_length_pairs_digest
.as_deref()
.map_or(false, |d| d == attr_digest)),
"sorted_sequences" => Ok(meta
.sorted_sequences_digest
.as_deref()
.map_or(false, |d| d == attr_digest)),
_ => Err(anyhow!(
"Unknown attribute: '{}'. Supported: names, lengths, sequences, \
name_length_pairs, sorted_name_length_pairs, sorted_sequences",
attr_name
)),
}
}
const ATTRIBUTE_SEARCH_WARN_THRESHOLD: usize = 10_000;
const ATTRIBUTE_SEARCH_ERROR_THRESHOLD: usize = 100_000;
impl ReadonlyRefgetStore {
pub fn enable_ancillary_digests(&mut self) {
self.ancillary_digests = true;
}
pub fn disable_ancillary_digests(&mut self) {
self.ancillary_digests = false;
}
pub fn has_ancillary_digests(&self) -> bool {
self.ancillary_digests
}
pub fn has_attribute_index(&self) -> bool {
self.attribute_index
}
pub fn get_collection_level1(&self, digest: &str) -> Result<CollectionLevel1> {
let key = digest.to_key();
let record = self
.collections
.get(&key)
.ok_or_else(|| anyhow!("Collection not found: {}", digest))?;
Ok(record.metadata().to_level1())
}
pub fn get_collection_level2(&self, digest: &str) -> Result<CollectionLevel2> {
let collection = self.get_collection(digest)?;
Ok(collection.to_level2())
}
pub fn compare(&self, digest_a: &str, digest_b: &str) -> Result<SeqColComparison> {
let coll_a = self.get_collection(digest_a)?;
let coll_b = self.get_collection(digest_b)?;
Ok(coll_a.compare(&coll_b))
}
pub fn compare_with_level2(
&self,
digest_a: &str,
external: &CollectionLevel2,
) -> Result<SeqColComparison> {
let coll_a = self.get_collection(digest_a)?;
let arrays_a = coll_a.to_comparison_arrays();
let arrays_b = level2_to_comparison_arrays(external);
Ok(compare_arrays(
arrays_a,
arrays_b,
coll_a.metadata.digest.clone(),
None,
))
}
pub fn find_collections_by_attribute(
&self,
attr_name: &str,
attr_digest: &str,
) -> Result<Vec<String>> {
if self.attribute_index {
self.find_collections_by_attribute_indexed(attr_name, attr_digest)
} else {
self.find_collections_by_attribute_scan(attr_name, attr_digest)
}
}
fn find_collections_by_attribute_scan(
&self,
attr_name: &str,
attr_digest: &str,
) -> Result<Vec<String>> {
let count = self.collections.len();
if count > ATTRIBUTE_SEARCH_ERROR_THRESHOLD {
return Err(anyhow!(
"Brute-force attribute search is limited to {} collections ({} in store). \
Indexed attribute lookup is planned for a future release.",
ATTRIBUTE_SEARCH_ERROR_THRESHOLD,
count
));
}
if count > ATTRIBUTE_SEARCH_WARN_THRESHOLD {
eprintln!(
"Warning: brute-force attribute search scanning {} collections. \
This may be slow.",
count
);
}
let mut results = Vec::new();
for record in self.collections.values() {
let meta = record.metadata();
if metadata_matches_attribute(meta, attr_name, attr_digest)? {
results.push(meta.digest.clone());
}
}
Ok(results)
}
pub fn get_attribute(
&self,
attr_name: &str,
attr_digest: &str,
) -> Result<Option<serde_json::Value>> {
let collections = self.find_collections_by_attribute(attr_name, attr_digest)?;
if collections.is_empty() {
return Ok(None);
}
let collection = self.get_collection(&collections[0])?;
let lvl2 = collection.to_level2();
let value = match attr_name {
"names" => serde_json::Value::Array(
lvl2.names
.iter()
.map(|s| serde_json::Value::String(s.clone()))
.collect(),
),
"lengths" => serde_json::Value::Array(
lvl2.lengths
.iter()
.map(|l| serde_json::Value::Number(serde_json::Number::from(*l)))
.collect(),
),
"sequences" => serde_json::Value::Array(
lvl2.sequences
.iter()
.map(|s| serde_json::Value::String(s.clone()))
.collect(),
),
"sorted_sequences" => serde_json::Value::Array(
collection
.build_sorted_sequences()
.into_iter()
.map(serde_json::Value::String)
.collect(),
),
"name_length_pairs" => {
serde_json::Value::Array(collection.build_name_length_pairs())
}
"sorted_name_length_pairs" => {
serde_json::Value::Array(collection.build_sorted_name_length_pairs())
}
_ => {
return Err(anyhow!(
"Unknown attribute: '{}'. Supported: names, lengths, sequences, \
name_length_pairs, sorted_name_length_pairs, sorted_sequences",
attr_name
))
}
};
Ok(Some(value))
}
pub fn enable_attribute_index(&mut self) {
self.attribute_index = true;
}
pub fn disable_attribute_index(&mut self) {
self.attribute_index = false;
}
fn find_collections_by_attribute_indexed(
&self,
_attr_name: &str,
_attr_digest: &str,
) -> Result<Vec<String>> {
Err(anyhow!(
"Indexed attribute lookup is not yet implemented. \
This feature is planned for a future release. \
For now, use the brute-force scan by keeping attribute_index disabled."
))
}
pub fn collection_count(&self) -> usize {
self.collections.len()
}
}
impl SeqColService for ReadonlyRefgetStore {
fn get_collection_level1(&self, digest: &str) -> Result<CollectionLevel1> {
ReadonlyRefgetStore::get_collection_level1(self, digest)
}
fn get_collection_level2(&self, digest: &str) -> Result<CollectionLevel2> {
ReadonlyRefgetStore::get_collection_level2(self, digest)
}
fn compare(&self, digest_a: &str, digest_b: &str) -> Result<SeqColComparison> {
ReadonlyRefgetStore::compare(self, digest_a, digest_b)
}
fn compare_with_level2(
&self,
digest_a: &str,
external: &CollectionLevel2,
) -> Result<SeqColComparison> {
ReadonlyRefgetStore::compare_with_level2(self, digest_a, external)
}
fn find_collections_by_attribute(
&self,
attr_name: &str,
attr_digest: &str,
) -> Result<Vec<String>> {
ReadonlyRefgetStore::find_collections_by_attribute(self, attr_name, attr_digest)
}
fn get_attribute(
&self,
attr_name: &str,
attr_digest: &str,
) -> Result<Option<serde_json::Value>> {
ReadonlyRefgetStore::get_attribute(self, attr_name, attr_digest)
}
fn list_collections(
&self,
page: usize,
page_size: usize,
filters: &[(&str, &str)],
) -> Result<PagedResult<SequenceCollectionMetadata>> {
ReadonlyRefgetStore::list_collections(self, page, page_size, filters)
}
fn collection_count(&self) -> usize {
ReadonlyRefgetStore::collection_count(self)
}
}
#[cfg(test)]
mod tests {
use crate::store::{FastaImportOptions, ReadonlyRefgetStore, RefgetStore};
use std::path::PathBuf;
fn copy_test_fasta(temp_dir: &std::path::Path, name: &str) -> PathBuf {
let src = format!("../tests/data/fasta/{}", name);
let dst = temp_dir.join(name);
std::fs::copy(&src, &dst)
.unwrap_or_else(|e| panic!("Failed to copy {} to tempdir: {}", src, e));
dst
}
#[test]
fn test_ancillary_digests_computed() {
let mut store = RefgetStore::in_memory();
assert!(store.has_ancillary_digests());
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
assert!(metadata.name_length_pairs_digest.is_some());
assert!(metadata.sorted_name_length_pairs_digest.is_some());
assert!(metadata.sorted_sequences_digest.is_some());
let coll_meta = store.get_collection_metadata(&metadata.digest).unwrap();
assert!(coll_meta.name_length_pairs_digest.is_some());
assert!(coll_meta.sorted_name_length_pairs_digest.is_some());
assert!(coll_meta.sorted_sequences_digest.is_some());
}
#[test]
fn test_ancillary_digests_disabled() {
let mut store = RefgetStore::in_memory();
store.disable_ancillary_digests();
assert!(!store.has_ancillary_digests());
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
assert!(metadata.name_length_pairs_digest.is_none());
assert!(metadata.sorted_name_length_pairs_digest.is_none());
assert!(metadata.sorted_sequences_digest.is_none());
}
#[test]
fn test_collection_level1() {
let mut store = RefgetStore::in_memory();
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let lvl1 = store.get_collection_level1(&metadata.digest).unwrap();
assert_eq!(lvl1.names, metadata.names_digest);
assert_eq!(lvl1.lengths, metadata.lengths_digest);
assert_eq!(lvl1.sequences, metadata.sequences_digest);
assert!(lvl1.name_length_pairs.is_some());
assert!(lvl1.sorted_name_length_pairs.is_some());
assert!(lvl1.sorted_sequences.is_some());
}
#[test]
fn test_collection_level2() {
let mut store = RefgetStore::in_memory();
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let lvl2 = store.get_collection_level2(&metadata.digest).unwrap();
assert_eq!(lvl2.names.len(), 3); assert_eq!(lvl2.lengths.len(), 3);
assert_eq!(lvl2.sequences.len(), 3);
for seq in &lvl2.sequences {
assert!(seq.starts_with("SQ."), "Expected SQ. prefix, got: {}", seq);
}
assert!(lvl2.lengths.contains(&8)); assert!(lvl2.lengths.contains(&4)); }
#[test]
fn test_compare_collections() {
let mut store = RefgetStore::in_memory();
let (meta_a, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let (meta_b, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
let self_result = store.compare(&meta_a.digest, &meta_a.digest).unwrap();
assert_eq!(Some(self_result.digests.a.as_str()), self_result.digests.b.as_deref());
assert_eq!(self_result.attributes.a_and_b.len(), 6); for attr in &self_result.attributes.a_and_b {
assert_eq!(self_result.array_elements.a_and_b_same_order[attr], Some(true));
}
let cross_result = store.compare(&meta_a.digest, &meta_b.digest).unwrap();
assert_ne!(Some(cross_result.digests.a.as_str()), cross_result.digests.b.as_deref());
assert_eq!(cross_result.attributes.a_and_b.len(), 6);
assert!(cross_result.attributes.a_only.is_empty());
assert!(cross_result.attributes.b_only.is_empty());
}
#[test]
fn test_compare_mixed_ancillary() {
let mut store = RefgetStore::in_memory();
let (meta_a, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
store.disable_ancillary_digests();
let (meta_b, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
let result = store.compare(&meta_a.digest, &meta_b.digest).unwrap();
assert_eq!(result.attributes.a_and_b.len(), 3);
assert_eq!(result.attributes.a_only.len(), 3);
assert!(result.attributes.b_only.is_empty());
}
#[test]
fn test_find_collections_by_attribute() {
let mut store = RefgetStore::in_memory();
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let results = store
.find_collections_by_attribute("names", &metadata.names_digest)
.unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0], metadata.digest);
let results = store
.find_collections_by_attribute("lengths", &metadata.lengths_digest)
.unwrap();
assert_eq!(results.len(), 1);
let results = store
.find_collections_by_attribute("sequences", &metadata.sequences_digest)
.unwrap();
assert_eq!(results.len(), 1);
let nlp = metadata.name_length_pairs_digest.as_ref().unwrap();
let results = store
.find_collections_by_attribute("name_length_pairs", nlp)
.unwrap();
assert_eq!(results.len(), 1);
let results = store
.find_collections_by_attribute("names", "nonexistent")
.unwrap();
assert!(results.is_empty());
assert!(store
.find_collections_by_attribute("unknown", "digest")
.is_err());
}
#[test]
fn test_get_attribute() {
let mut store = RefgetStore::in_memory();
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let result = store
.get_attribute("names", &metadata.names_digest)
.unwrap();
assert!(result.is_some());
let names = result.unwrap();
assert!(names.is_array());
assert_eq!(names.as_array().unwrap().len(), 3);
let result = store
.get_attribute("lengths", &metadata.lengths_digest)
.unwrap();
assert!(result.is_some());
let result = store.get_attribute("names", "nonexistent").unwrap();
assert!(result.is_none());
}
#[test]
fn test_get_attribute_sorted_sequences() {
let mut store = RefgetStore::in_memory();
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let digest = metadata.sorted_sequences_digest.as_ref().unwrap();
let result = store.get_attribute("sorted_sequences", digest).unwrap();
assert!(result.is_some());
let arr = result.unwrap();
assert!(arr.is_array());
let items = arr.as_array().unwrap();
assert_eq!(items.len(), 3);
for item in items {
let s = item.as_str().unwrap();
assert!(s.starts_with("SQ."), "Expected SQ. prefix, got: {}", s);
}
let strings: Vec<&str> = items.iter().map(|v| v.as_str().unwrap()).collect();
let mut sorted = strings.clone();
sorted.sort();
assert_eq!(strings, sorted, "sorted_sequences should be in sorted order");
}
#[test]
fn test_get_attribute_name_length_pairs() {
let mut store = RefgetStore::in_memory();
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let digest = metadata.name_length_pairs_digest.as_ref().unwrap();
let result = store.get_attribute("name_length_pairs", digest).unwrap();
assert!(result.is_some());
let arr = result.unwrap();
assert!(arr.is_array());
let items = arr.as_array().unwrap();
assert_eq!(items.len(), 3);
for item in items {
let obj = item.as_object().unwrap();
assert!(obj.contains_key("name"), "Expected 'name' key in object");
assert!(obj.contains_key("length"), "Expected 'length' key in object");
assert!(obj["name"].is_string(), "name should be a string");
assert!(obj["length"].is_number(), "length should be a number");
}
}
#[test]
fn test_get_attribute_sorted_name_length_pairs() {
use crate::digest::algorithms::{canonicalize_json, sha512t24u};
let mut store = RefgetStore::in_memory();
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let digest = metadata.sorted_name_length_pairs_digest.as_ref().unwrap();
let result = store
.get_attribute("sorted_name_length_pairs", digest)
.unwrap();
assert!(result.is_some());
let arr = result.unwrap();
assert!(arr.is_array());
let items = arr.as_array().unwrap();
assert_eq!(items.len(), 3);
for item in items {
let obj = item.as_object().unwrap();
assert!(obj.contains_key("name"));
assert!(obj.contains_key("length"));
}
let digests: Vec<String> = items
.iter()
.map(|v| sha512t24u(canonicalize_json(v).as_bytes()))
.collect();
let mut sorted_digests = digests.clone();
sorted_digests.sort();
assert_eq!(
digests, sorted_digests,
"sorted_name_length_pairs objects should be in sorted digest order"
);
}
#[test]
fn test_get_attribute_ancillary_not_computed() {
let mut store = RefgetStore::in_memory();
store.disable_ancillary_digests();
let (metadata, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
assert!(metadata.name_length_pairs_digest.is_none());
let result = store
.get_attribute("name_length_pairs", "some_digest")
.unwrap();
assert!(
result.is_none(),
"Expected None when no ancillary digests are computed"
);
}
#[test]
fn test_rgci_roundtrip_with_ancillary() {
let dir = tempfile::tempdir().unwrap();
let dir_path = dir.path();
let temp_fasta = copy_test_fasta(dir_path, "base.fa");
{
let mut store = RefgetStore::on_disk(dir_path).unwrap();
store
.add_sequence_collection_from_fasta(&temp_fasta, FastaImportOptions::new())
.unwrap();
store.write().unwrap();
}
{
let store = RefgetStore::open_local(dir_path).unwrap();
let collections = store.list_collections(0, usize::MAX, &[]).unwrap();
assert_eq!(collections.results.len(), 1);
let meta = &collections.results[0];
assert!(meta.name_length_pairs_digest.is_some());
assert!(meta.sorted_name_length_pairs_digest.is_some());
assert!(meta.sorted_sequences_digest.is_some());
}
}
#[test]
fn test_compliance_digests_from_fixture() {
let fixture_path = "../tests/data/fasta/test_fasta_digests.json";
let fixture_str = std::fs::read_to_string(fixture_path)
.unwrap_or_else(|e| panic!("Failed to read {}: {}", fixture_path, e));
let fixture: serde_json::Value = serde_json::from_str(&fixture_str)
.unwrap_or_else(|e| panic!("Failed to parse {}: {}", fixture_path, e));
let mut store = RefgetStore::in_memory();
store.enable_ancillary_digests();
for (fa_name, bundle) in fixture.as_object().unwrap() {
let fasta_path = format!("../tests/data/fasta/{}", fa_name);
let (meta, _) = store
.add_sequence_collection_from_fasta(&fasta_path, FastaImportOptions::new())
.unwrap_or_else(|e| panic!("{}: {}", fa_name, e));
let lvl1 = bundle["level1"].as_object().unwrap();
let expected_digest = bundle["top_level_digest"].as_str().unwrap();
assert_eq!(meta.digest, expected_digest, "{}: top_level_digest", fa_name);
assert_eq!(meta.names_digest, lvl1["names"].as_str().unwrap(), "{}: names", fa_name);
assert_eq!(meta.lengths_digest, lvl1["lengths"].as_str().unwrap(), "{}: lengths", fa_name);
assert_eq!(meta.sequences_digest, lvl1["sequences"].as_str().unwrap(), "{}: sequences", fa_name);
assert_eq!(
meta.sorted_sequences_digest.as_deref(),
Some(lvl1["sorted_sequences"].as_str().unwrap()),
"{}: sorted_sequences", fa_name
);
assert_eq!(
meta.name_length_pairs_digest.as_deref(),
Some(lvl1["name_length_pairs"].as_str().unwrap()),
"{}: name_length_pairs", fa_name
);
assert_eq!(
meta.sorted_name_length_pairs_digest.as_deref(),
Some(lvl1["sorted_name_length_pairs"].as_str().unwrap()),
"{}: sorted_name_length_pairs", fa_name
);
}
}
#[test]
fn test_store_config_persisted() {
let dir = tempfile::tempdir().unwrap();
let dir_path = dir.path();
let temp_fasta = copy_test_fasta(dir_path, "base.fa");
{
let mut store = RefgetStore::on_disk(dir_path).unwrap();
assert!(store.has_ancillary_digests());
assert!(!store.has_attribute_index());
store
.add_sequence_collection_from_fasta(&temp_fasta, FastaImportOptions::new())
.unwrap();
store.write().unwrap();
}
{
let store = RefgetStore::open_local(dir_path).unwrap();
assert!(store.has_ancillary_digests());
assert!(!store.has_attribute_index());
}
}
#[test]
fn test_compare_with_level2_self_identical() {
let mut store = RefgetStore::in_memory();
let (meta, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let level2 = store.get_collection_level2(&meta.digest).unwrap();
let result = store.compare_with_level2(&meta.digest, &level2).unwrap();
assert_eq!(result.digests.a, meta.digest);
assert!(result.digests.b.is_none(), "digests.b should be None for external level-2 comparison");
assert!(result.attributes.a_and_b.contains(&"names".to_string()));
assert!(result.attributes.a_and_b.contains(&"lengths".to_string()));
assert!(result.attributes.a_and_b.contains(&"sequences".to_string()));
for attr in &["names", "lengths", "sequences"] {
assert_eq!(
result.array_elements.a_and_b_same_order[*attr],
Some(true),
"{} should be in same order",
attr
);
}
}
#[test]
fn test_compare_with_level2_cross_compare() {
let mut store = RefgetStore::in_memory();
let (meta_a, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let (meta_b, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
let level2_b = store.get_collection_level2(&meta_b.digest).unwrap();
let compare_result = store.compare(&meta_a.digest, &meta_b.digest).unwrap();
let with_level2_result = store.compare_with_level2(&meta_a.digest, &level2_b).unwrap();
assert_eq!(compare_result.digests.b, Some(meta_b.digest.clone()));
assert!(with_level2_result.digests.b.is_none());
assert_eq!(compare_result.digests.a, with_level2_result.digests.a);
for attr in &["names", "lengths", "sequences"] {
assert!(
with_level2_result.attributes.a_and_b.contains(&attr.to_string())
|| with_level2_result.attributes.a_only.contains(&attr.to_string())
|| with_level2_result.attributes.b_only.contains(&attr.to_string()),
"attr {} must appear somewhere",
attr
);
}
}
#[test]
fn test_compare_with_level2_unknown_digest_returns_error() {
let store = RefgetStore::in_memory();
use crate::digest::CollectionLevel2;
let level2 = CollectionLevel2 {
names: vec!["chr1".to_string()],
lengths: vec![100],
sequences: vec!["SQ.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string()],
};
let result = store.compare_with_level2("nonexistent_digest", &level2);
assert!(result.is_err(), "Expected error for unknown digest");
}
#[test]
fn test_compare_with_level2_ancillary_in_a_only() {
let mut store = RefgetStore::in_memory();
let (meta, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
assert!(meta.name_length_pairs_digest.is_some());
assert!(meta.sorted_name_length_pairs_digest.is_some());
assert!(meta.sorted_sequences_digest.is_some());
let level2 = store.get_collection_level2(&meta.digest).unwrap();
let result = store.compare_with_level2(&meta.digest, &level2).unwrap();
assert!(
result.attributes.a_only.contains(&"sorted_sequences".to_string()),
"sorted_sequences should be in a_only"
);
assert!(
result.attributes.a_only.contains(&"name_length_pairs".to_string()),
"name_length_pairs should be in a_only"
);
assert!(
result.attributes.a_only.contains(&"sorted_name_length_pairs".to_string()),
"sorted_name_length_pairs should be in a_only"
);
assert!(
result.attributes.b_only.is_empty(),
"b_only should be empty when level-2 has only core attributes"
);
}
#[test]
fn test_list_collections_paged_no_filters() {
let mut store = RefgetStore::in_memory();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
let result = store.list_collections(0, 2, &[]).unwrap();
assert_eq!(result.results.len(), 2);
assert_eq!(result.pagination.total, 2);
assert_eq!(result.pagination.page, 0);
assert_eq!(result.pagination.page_size, 2);
assert!(result.results[0].digest <= result.results[1].digest);
}
#[test]
fn test_list_collections_paged_second_page() {
let mut store = RefgetStore::in_memory();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
let page0 = store.list_collections(0, 1, &[]).unwrap();
assert_eq!(page0.results.len(), 1);
assert_eq!(page0.pagination.total, 2);
let page1 = store.list_collections(1, 1, &[]).unwrap();
assert_eq!(page1.results.len(), 1);
assert_eq!(page1.pagination.total, 2);
assert_ne!(page0.results[0].digest, page1.results[0].digest);
}
#[test]
fn test_list_collections_paged_beyond_end() {
let mut store = RefgetStore::in_memory();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let result = store.list_collections(10, 100, &[]).unwrap();
assert!(result.results.is_empty());
assert_eq!(result.pagination.total, 1);
}
#[test]
fn test_list_collections_single_filter() {
let mut store = RefgetStore::in_memory();
let (meta, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
let result = store.list_collections(0, 100, &[("names", &meta.names_digest)]).unwrap();
assert_eq!(result.results.len(), 1);
assert_eq!(result.results[0].digest, meta.digest);
assert_eq!(result.pagination.total, 1);
}
#[test]
fn test_list_collections_multi_filter_and() {
let mut store = RefgetStore::in_memory();
let (meta, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
let result = store.list_collections(0, 100, &[
("names", &meta.names_digest),
("lengths", &meta.lengths_digest),
]).unwrap();
assert_eq!(result.results.len(), 1);
assert_eq!(result.results[0].digest, meta.digest);
}
#[test]
fn test_list_collections_filter_no_match() {
let mut store = RefgetStore::in_memory();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let result = store.list_collections(0, 100, &[("names", "nonexistent_digest")]).unwrap();
assert!(result.results.is_empty());
assert_eq!(result.pagination.total, 0);
}
#[test]
fn test_list_collections_invalid_attribute() {
let mut store = RefgetStore::in_memory();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let result = store.list_collections(0, 100, &[("unknown_attr", "digest")]);
assert!(result.is_err());
}
#[test]
fn test_list_collections_filter_with_pagination() {
let mut store = RefgetStore::in_memory();
let (meta_a, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let (_meta_b, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
let page0 = store.list_collections(0, 1, &[("lengths", &meta_a.lengths_digest)]).unwrap();
assert_eq!(page0.results.len(), 1);
assert_eq!(page0.pagination.total, 2);
let page1 = store.list_collections(1, 1, &[("lengths", &meta_a.lengths_digest)]).unwrap();
assert_eq!(page1.results.len(), 1);
assert_eq!(page1.pagination.total, 2);
assert_ne!(page0.results[0].digest, page1.results[0].digest);
}
#[test]
fn test_trait_object_safety() {
use super::SeqColService;
use std::sync::Arc;
let mut store = RefgetStore::in_memory();
let (meta, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let readonly = store.into_readonly();
let service: Arc<dyn SeqColService + Send + Sync> = Arc::new(readonly);
let lvl1 = service.get_collection_level1(&meta.digest).unwrap();
assert_eq!(lvl1.names, meta.names_digest);
let lvl2 = service.get_collection_level2(&meta.digest).unwrap();
assert_eq!(lvl2.names.len(), 3);
let cmp = service.compare(&meta.digest, &meta.digest).unwrap();
assert_eq!(cmp.digests.a, meta.digest);
let cmp2 = service.compare_with_level2(&meta.digest, &lvl2).unwrap();
assert_eq!(cmp2.digests.a, meta.digest);
let found = service
.find_collections_by_attribute("names", &meta.names_digest)
.unwrap();
assert_eq!(found.len(), 1);
let attr = service
.get_attribute("names", &meta.names_digest)
.unwrap();
assert!(attr.is_some());
let paged = service.list_collections(0, 10, &[]).unwrap();
assert_eq!(paged.results.len(), 1);
assert_eq!(service.collection_count(), 1);
}
#[test]
fn test_collection_count() {
let mut store = RefgetStore::in_memory();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
store
.add_sequence_collection_from_fasta("../tests/data/fasta/different_names.fa", FastaImportOptions::new())
.unwrap();
assert_eq!(store.collection_count(), 2);
}
#[test]
fn test_trait_methods_match_concrete() {
use super::SeqColService;
let mut store = RefgetStore::in_memory();
let (meta, _) = store
.add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
.unwrap();
let readonly = store.into_readonly();
let concrete_lvl1 = ReadonlyRefgetStore::get_collection_level1(&readonly, &meta.digest).unwrap();
let trait_ref: &dyn SeqColService = &readonly;
let trait_lvl1 = trait_ref.get_collection_level1(&meta.digest).unwrap();
assert_eq!(concrete_lvl1.names, trait_lvl1.names);
assert_eq!(concrete_lvl1.lengths, trait_lvl1.lengths);
assert_eq!(concrete_lvl1.sequences, trait_lvl1.sequences);
let concrete_list = ReadonlyRefgetStore::list_collections(&readonly, 0, 10, &[]).unwrap();
let trait_list = trait_ref.list_collections(0, 10, &[]).unwrap();
assert_eq!(concrete_list.results.len(), trait_list.results.len());
assert_eq!(concrete_list.pagination.total, trait_list.pagination.total);
assert_eq!(
ReadonlyRefgetStore::collection_count(&readonly),
trait_ref.collection_count()
);
}
}