Skip to main content

refget_store/
seqcol_store.rs

1//! In-memory sequence collection store.
2
3use std::collections::HashMap;
4
5use refget_digest::digest_json;
6use refget_model::SeqCol;
7
8use crate::{ListItem, ListResult, SeqColStore};
9
10/// An in-memory store of sequence collections, indexed by Level 0 digest
11/// and by per-attribute digests.
12pub struct InMemorySeqColStore {
13    /// Map from Level 0 digest to SeqCol.
14    collections: HashMap<String, SeqCol>,
15    /// Map from (attribute_name, attribute_digest) to list of collection digests.
16    attribute_index: HashMap<(String, String), Vec<String>>,
17    /// Map from (attribute_name, attribute_digest) to the attribute JSON value.
18    attribute_values: HashMap<(String, String), serde_json::Value>,
19    /// Ordered list of all collection digests.
20    digests: Vec<String>,
21}
22
23impl InMemorySeqColStore {
24    /// Create a new empty store.
25    pub fn new() -> Self {
26        Self {
27            collections: HashMap::new(),
28            attribute_index: HashMap::new(),
29            attribute_values: HashMap::new(),
30            digests: Vec::new(),
31        }
32    }
33
34    /// Add a sequence collection to the store. Computes and indexes all attribute digests.
35    pub fn add(&mut self, col: SeqCol) {
36        let digest = col.digest();
37
38        // Index inherent attributes
39        self.index_attribute("names", &col.names, &digest);
40        self.index_attribute_u64("lengths", &col.lengths, &digest);
41        self.index_attribute("sequences", &col.sequences, &digest);
42
43        // Index computed attributes
44        let snlp = col.sorted_name_length_pairs();
45        self.index_attribute("sorted_name_length_pairs", &snlp, &digest);
46
47        let nlp = col.name_length_pairs();
48        self.index_attribute("name_length_pairs", &nlp, &digest);
49
50        self.digests.push(digest.clone());
51        self.collections.insert(digest, col);
52    }
53
54    fn index_attribute(&mut self, name: &str, values: &[String], collection_digest: &str) {
55        let json_array: Vec<serde_json::Value> =
56            values.iter().map(|v| serde_json::Value::String(v.clone())).collect();
57        let json = serde_json::Value::Array(json_array.clone());
58        let attr_digest = digest_json(&json);
59
60        let key = (name.to_string(), attr_digest.clone());
61        self.attribute_index.entry(key.clone()).or_default().push(collection_digest.to_string());
62        self.attribute_values.entry(key).or_insert(serde_json::Value::Array(json_array));
63    }
64
65    fn index_attribute_u64(&mut self, name: &str, values: &[u64], collection_digest: &str) {
66        let json_array: Vec<serde_json::Value> =
67            values.iter().map(|v| serde_json::json!(v)).collect();
68        let json = serde_json::Value::Array(json_array.clone());
69        let attr_digest = digest_json(&json);
70
71        let key = (name.to_string(), attr_digest.clone());
72        self.attribute_index.entry(key.clone()).or_default().push(collection_digest.to_string());
73        self.attribute_values.entry(key).or_insert(serde_json::Value::Array(json_array));
74    }
75}
76
77impl Default for InMemorySeqColStore {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl SeqColStore for InMemorySeqColStore {
84    fn get_collection(&self, digest: &str) -> Option<&SeqCol> {
85        self.collections.get(digest)
86    }
87
88    fn list_collections(
89        &self,
90        filters: &[(String, String)],
91        page: usize,
92        page_size: usize,
93    ) -> ListResult {
94        // Start with all digests, then narrow by filters
95        let mut matching: Option<Vec<&str>> = None;
96
97        for (attr_name, attr_digest) in filters {
98            let key = (attr_name.clone(), attr_digest.clone());
99            if let Some(collection_digests) = self.attribute_index.get(&key) {
100                let set: Vec<&str> = collection_digests.iter().map(String::as_str).collect();
101                matching = Some(match matching {
102                    None => set,
103                    Some(prev) => prev.into_iter().filter(|d| set.contains(d)).collect(),
104                });
105            } else {
106                return ListResult { items: vec![], total: 0, page, page_size };
107            }
108        }
109
110        let all_digests: Vec<&str> = match matching {
111            Some(m) => m,
112            None => self.digests.iter().map(String::as_str).collect(),
113        };
114
115        let total = all_digests.len();
116        let start = page * page_size;
117        let items: Vec<ListItem> = all_digests
118            .into_iter()
119            .skip(start)
120            .take(page_size)
121            .map(|d| ListItem { digest: d.to_string() })
122            .collect();
123
124        ListResult { items, total, page, page_size }
125    }
126
127    fn get_attribute(&self, name: &str, digest: &str) -> Option<serde_json::Value> {
128        let key = (name.to_string(), digest.to_string());
129        self.attribute_values.get(&key).cloned()
130    }
131
132    fn count(&self) -> usize {
133        self.collections.len()
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140    use refget_model::SeqCol;
141
142    fn example_col() -> SeqCol {
143        SeqCol {
144            names: vec!["chr1".to_string(), "chr2".to_string()],
145            lengths: vec![100, 200],
146            sequences: vec!["SQ.abc".to_string(), "SQ.def".to_string()],
147            sorted_name_length_pairs: None,
148        }
149    }
150
151    #[test]
152    fn test_add_and_get() {
153        let mut store = InMemorySeqColStore::new();
154        let col = example_col();
155        let digest = col.digest();
156        store.add(col.clone());
157
158        let found = store.get_collection(&digest).unwrap();
159        assert_eq!(found.names, col.names);
160    }
161
162    #[test]
163    fn test_list_no_filters() {
164        let mut store = InMemorySeqColStore::new();
165        store.add(example_col());
166        let result = store.list_collections(&[], 0, 10);
167        assert_eq!(result.total, 1);
168        assert_eq!(result.items.len(), 1);
169    }
170
171    #[test]
172    fn test_count() {
173        let mut store = InMemorySeqColStore::new();
174        assert_eq!(store.count(), 0);
175        store.add(example_col());
176        assert_eq!(store.count(), 1);
177    }
178
179    #[test]
180    fn test_get_attribute() {
181        let mut store = InMemorySeqColStore::new();
182        let col = example_col();
183        let level1 = col.to_level1();
184        store.add(col);
185
186        let names = store.get_attribute("names", &level1.names);
187        assert!(names.is_some());
188        let names_arr = names.unwrap();
189        assert!(names_arr.is_array());
190        assert_eq!(names_arr.as_array().unwrap().len(), 2);
191    }
192
193    fn make_col(name: &str) -> SeqCol {
194        SeqCol {
195            names: vec![name.to_string()],
196            lengths: vec![42],
197            sequences: vec![format!("SQ.{name}")],
198            sorted_name_length_pairs: None,
199        }
200    }
201
202    #[test]
203    fn test_list_collections_pagination() {
204        let mut store = InMemorySeqColStore::new();
205        let col_a = make_col("a");
206        let col_b = make_col("b");
207        let col_c = make_col("c");
208        let digest_b = col_b.digest();
209        store.add(col_a);
210        store.add(col_b);
211        store.add(col_c);
212
213        // page_size=1, page=1 should return the second collection
214        let result = store.list_collections(&[], 1, 1);
215        assert_eq!(result.total, 3);
216        assert_eq!(result.items.len(), 1);
217        assert_eq!(result.items[0].digest, digest_b);
218    }
219
220    #[test]
221    fn test_list_collections_filter_matches_nothing() {
222        let mut store = InMemorySeqColStore::new();
223        store.add(example_col());
224
225        let filters = vec![("names".to_string(), "nonexistent_digest".to_string())];
226        let result = store.list_collections(&filters, 0, 10);
227        assert_eq!(result.total, 0);
228        assert!(result.items.is_empty());
229    }
230
231    #[test]
232    fn test_list_collections_page_beyond_available() {
233        let mut store = InMemorySeqColStore::new();
234        store.add(example_col());
235
236        let result = store.list_collections(&[], 100, 10);
237        assert!(result.items.is_empty());
238        assert_eq!(result.total, 1);
239    }
240
241    #[test]
242    fn test_get_attribute_invalid_name_returns_none() {
243        let mut store = InMemorySeqColStore::new();
244        let col = example_col();
245        let level1 = col.to_level1();
246        store.add(col);
247
248        // Use a valid digest but an invalid attribute name
249        let result = store.get_attribute("not_a_real_attribute", &level1.names);
250        assert!(result.is_none());
251    }
252
253    #[test]
254    fn test_get_attribute_invalid_digest_returns_none() {
255        let mut store = InMemorySeqColStore::new();
256        store.add(example_col());
257
258        let result = store.get_attribute("names", "bogus_digest");
259        assert!(result.is_none());
260    }
261
262    #[test]
263    fn test_get_collection_non_existent_returns_none() {
264        let store = InMemorySeqColStore::new();
265        assert!(store.get_collection("no_such_digest").is_none());
266    }
267}