1use super::*;
7use super::readonly::ReadonlyRefgetStore;
8use super::core::RefgetStore;
9
10use std::collections::HashMap;
11use std::fs::{self, File, create_dir_all};
12use std::io::{BufRead, BufReader, Write};
13use std::path::Path;
14
15use anyhow::{Context, Result};
16
17use crate::hashkeyable::{DigestKey, HashKeyable, key_to_digest_string};
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum AliasKind {
26 Sequence,
27 Collection,
28}
29
30impl AliasKind {
31 pub fn subdir(&self) -> &'static str {
33 match self {
34 Self::Sequence => "sequences",
35 Self::Collection => "collections",
36 }
37 }
38}
39
40type AliasStore = HashMap<String, HashMap<String, DigestKey>>;
42
43#[derive(Debug)]
45pub struct AliasManager {
46 sequence_aliases: AliasStore,
47 collection_aliases: AliasStore,
48}
49
50fn alias_add(store: &mut AliasStore, namespace: &str, alias: &str, digest: DigestKey) {
55 store
56 .entry(namespace.to_string())
57 .or_default()
58 .insert(alias.to_string(), digest);
59}
60
61fn alias_resolve(store: &AliasStore, namespace: &str, alias: &str) -> Option<DigestKey> {
62 store.get(namespace).and_then(|ns| ns.get(alias)).copied()
63}
64
65fn alias_reverse_scan(store: &AliasStore, digest: &DigestKey) -> Vec<(String, String)> {
66 let mut results = Vec::new();
67 for (namespace, aliases) in store {
68 for (alias, d) in aliases {
69 if d == digest {
70 results.push((namespace.clone(), alias.clone()));
71 }
72 }
73 }
74 results
75}
76
77fn alias_namespaces(store: &AliasStore) -> Vec<String> {
78 store.keys().cloned().collect()
79}
80
81fn alias_list(store: &AliasStore, namespace: &str) -> Option<Vec<String>> {
82 store
83 .get(namespace)
84 .map(|ns| ns.keys().cloned().collect())
85}
86
87fn alias_remove(store: &mut AliasStore, namespace: &str, alias: &str) -> bool {
88 if let Some(ns) = store.get_mut(namespace) {
89 let removed = ns.remove(alias).is_some();
90 if ns.is_empty() {
91 store.remove(namespace);
92 }
93 removed
94 } else {
95 false
96 }
97}
98
99fn alias_load_tsv(store: &mut AliasStore, namespace: &str, path: &Path) -> Result<usize> {
100 let file = File::open(path)?;
101 let reader = BufReader::new(file);
102 let mut count = 0;
103 for line in reader.lines() {
104 let line = line?;
105 if line.starts_with('#') || line.trim().is_empty() {
106 continue;
107 }
108 let parts: Vec<&str> = line.splitn(2, '\t').collect();
109 if parts.len() == 2 {
110 let key = parts[1].to_key();
111 alias_add(store, namespace, parts[0], key);
112 count += 1;
113 }
114 }
115 Ok(count)
116}
117
118fn load_aliases_from_dir(store: &mut AliasStore, dir: &Path) -> Result<()> {
119 if !dir.exists() {
120 return Ok(());
121 }
122 for entry in fs::read_dir(dir)? {
123 let entry = entry?;
124 let path = entry.path();
125 if path.extension().and_then(|e| e.to_str()) == Some("tsv") {
126 let namespace = path
127 .file_stem()
128 .and_then(|s| s.to_str())
129 .context("Invalid filename")?
130 .to_string();
131 alias_load_tsv(store, &namespace, &path)?;
132 }
133 }
134 Ok(())
135}
136
137fn write_all_aliases(store: &AliasStore, dir: &Path) -> Result<()> {
138 if store.is_empty() {
139 return Ok(());
140 }
141 create_dir_all(dir)?;
142 for (namespace, aliases) in store {
143 let tsv_path = dir.join(format!("{}.tsv", namespace));
144 let mut file = File::create(&tsv_path)?;
145 for (alias, digest) in aliases {
146 writeln!(file, "{}\t{}", alias, key_to_digest_string(digest))?;
147 }
148 }
149 Ok(())
150}
151
152impl AliasManager {
157 pub fn new() -> Self {
158 AliasManager {
159 sequence_aliases: HashMap::new(),
160 collection_aliases: HashMap::new(),
161 }
162 }
163
164 pub fn is_empty(&self) -> bool {
165 self.sequence_aliases.is_empty() && self.collection_aliases.is_empty()
166 }
167
168 pub fn add_sequence(&mut self, namespace: &str, alias: &str, digest: &str) {
171 let key = digest.to_key();
172 alias_add(&mut self.sequence_aliases, namespace, alias, key);
173 }
174
175 pub fn resolve_sequence(&self, namespace: &str, alias: &str) -> Option<DigestKey> {
176 alias_resolve(&self.sequence_aliases, namespace, alias)
177 }
178
179 pub fn reverse_lookup_sequence(&self, digest: &str) -> Vec<(String, String)> {
180 let key = digest.to_key();
181 alias_reverse_scan(&self.sequence_aliases, &key)
182 }
183
184 pub fn sequence_namespaces(&self) -> Vec<String> {
185 alias_namespaces(&self.sequence_aliases)
186 }
187
188 pub fn sequence_aliases(&self, namespace: &str) -> Option<Vec<String>> {
189 alias_list(&self.sequence_aliases, namespace)
190 }
191
192 pub fn remove_sequence(&mut self, namespace: &str, alias: &str) -> bool {
193 alias_remove(&mut self.sequence_aliases, namespace, alias)
194 }
195
196 pub fn load_sequence_tsv(&mut self, namespace: &str, path: &Path) -> Result<usize> {
197 alias_load_tsv(&mut self.sequence_aliases, namespace, path)
198 }
199
200 pub fn add_collection(&mut self, namespace: &str, alias: &str, digest: &str) {
203 let key = digest.to_key();
204 alias_add(&mut self.collection_aliases, namespace, alias, key);
205 }
206
207 pub fn resolve_collection(&self, namespace: &str, alias: &str) -> Option<DigestKey> {
208 alias_resolve(&self.collection_aliases, namespace, alias)
209 }
210
211 pub fn reverse_lookup_collection(&self, digest: &str) -> Vec<(String, String)> {
212 let key = digest.to_key();
213 alias_reverse_scan(&self.collection_aliases, &key)
214 }
215
216 pub fn collection_namespaces(&self) -> Vec<String> {
217 alias_namespaces(&self.collection_aliases)
218 }
219
220 pub fn collection_aliases(&self, namespace: &str) -> Option<Vec<String>> {
221 alias_list(&self.collection_aliases, namespace)
222 }
223
224 pub fn remove_collection(&mut self, namespace: &str, alias: &str) -> bool {
225 alias_remove(&mut self.collection_aliases, namespace, alias)
226 }
227
228 pub fn load_collection_tsv(&mut self, namespace: &str, path: &Path) -> Result<usize> {
229 alias_load_tsv(&mut self.collection_aliases, namespace, path)
230 }
231
232 pub fn load_from_dir(&mut self, aliases_dir: &Path) -> Result<()> {
235 load_aliases_from_dir(&mut self.sequence_aliases, &aliases_dir.join("sequences"))?;
236 load_aliases_from_dir(&mut self.collection_aliases, &aliases_dir.join("collections"))?;
237 Ok(())
238 }
239
240 pub fn write_to_dir(&self, aliases_dir: &Path) -> Result<()> {
241 write_all_aliases(&self.sequence_aliases, &aliases_dir.join("sequences"))?;
242 write_all_aliases(&self.collection_aliases, &aliases_dir.join("collections"))?;
243 Ok(())
244 }
245
246 pub fn write_namespace(
247 &self,
248 aliases_dir: &Path,
249 kind: AliasKind,
250 namespace: &str,
251 ) -> Result<()> {
252 let dir = aliases_dir.join(kind.subdir());
253 create_dir_all(&dir)?;
254
255 let store = match kind {
256 AliasKind::Sequence => &self.sequence_aliases,
257 AliasKind::Collection => &self.collection_aliases,
258 };
259
260 let tsv_path = dir.join(format!("{}.tsv", namespace));
261 if let Some(ns) = store.get(namespace) {
262 let mut file = File::create(&tsv_path)?;
263 for (alias, digest) in ns {
264 writeln!(file, "{}\t{}", alias, key_to_digest_string(digest))?;
265 }
266 } else {
267 let _ = fs::remove_file(&tsv_path);
268 }
269 Ok(())
270 }
271}
272
273impl Default for AliasManager {
274 fn default() -> Self {
275 Self::new()
276 }
277}
278
279impl ReadonlyRefgetStore {
284 pub fn add_sequence_alias(&mut self, namespace: &str, alias: &str, digest: &str) -> Result<()> {
286 self.aliases.add_sequence(namespace, alias, digest);
287 self.persist_alias_namespace(AliasKind::Sequence, namespace)?;
288 Ok(())
289 }
290
291 pub fn get_sequence_metadata_by_alias(&self, namespace: &str, alias: &str) -> Option<&crate::digest::SequenceMetadata> {
293 let key = self.aliases.resolve_sequence(namespace, alias)?;
294 self.sequence_store.get(&key).map(|rec| rec.metadata())
295 }
296
297 pub fn get_sequence_by_alias(&self, namespace: &str, alias: &str) -> Result<&crate::digest::SequenceRecord> {
299 let key = self.aliases.resolve_sequence(namespace, alias)
300 .ok_or_else(|| anyhow::anyhow!("Sequence alias not found: {}/{}", namespace, alias))?;
301 self.sequence_store.get(&key)
302 .ok_or_else(|| anyhow::anyhow!("Sequence not found for alias {}/{}", namespace, alias))
303 }
304
305 pub fn get_aliases_for_sequence(&self, digest: &str) -> Vec<(String, String)> {
307 self.aliases.reverse_lookup_sequence(digest)
308 }
309
310 pub fn list_sequence_alias_namespaces(&self) -> Vec<String> {
312 self.aliases.sequence_namespaces()
313 }
314
315 pub fn list_sequence_aliases(&self, namespace: &str) -> Option<Vec<String>> {
317 self.aliases.sequence_aliases(namespace)
318 }
319
320 pub fn remove_sequence_alias(&mut self, namespace: &str, alias: &str) -> Result<bool> {
322 let removed = self.aliases.remove_sequence(namespace, alias);
323 if removed {
324 self.persist_alias_namespace(AliasKind::Sequence, namespace)?;
325 }
326 Ok(removed)
327 }
328
329 pub fn load_sequence_aliases(&mut self, namespace: &str, path: &str) -> Result<usize> {
331 let count = self.aliases.load_sequence_tsv(namespace, Path::new(path))?;
332 self.persist_alias_namespace(AliasKind::Sequence, namespace)?;
333 Ok(count)
334 }
335
336 pub fn add_collection_alias(&mut self, namespace: &str, alias: &str, digest: &str) -> Result<()> {
338 self.aliases.add_collection(namespace, alias, digest);
339 self.persist_alias_namespace(AliasKind::Collection, namespace)?;
340 Ok(())
341 }
342
343 pub fn get_collection_metadata_by_alias(&self, namespace: &str, alias: &str) -> Option<&crate::digest::SequenceCollectionMetadata> {
345 let key = self.aliases.resolve_collection(namespace, alias)?;
346 self.collections.get(&key).map(|rec| rec.metadata())
347 }
348
349 pub fn get_collection_by_alias(&self, namespace: &str, alias: &str) -> Result<crate::digest::SequenceCollection> {
351 let key = self.aliases.resolve_collection(namespace, alias)
352 .ok_or_else(|| anyhow::anyhow!("Collection alias not found: {}/{}", namespace, alias))?;
353 let digest_str = key_to_digest_string(&key);
354 self.get_collection(&digest_str)
355 }
356
357 pub fn get_aliases_for_collection(&self, digest: &str) -> Vec<(String, String)> {
359 self.aliases.reverse_lookup_collection(digest)
360 }
361
362 pub fn list_collection_alias_namespaces(&self) -> Vec<String> {
364 self.aliases.collection_namespaces()
365 }
366
367 pub fn list_collection_aliases(&self, namespace: &str) -> Option<Vec<String>> {
369 self.aliases.collection_aliases(namespace)
370 }
371
372 pub fn remove_collection_alias(&mut self, namespace: &str, alias: &str) -> Result<bool> {
374 let removed = self.aliases.remove_collection(namespace, alias);
375 if removed {
376 self.persist_alias_namespace(AliasKind::Collection, namespace)?;
377 }
378 Ok(removed)
379 }
380
381 pub fn load_collection_aliases(&mut self, namespace: &str, path: &str) -> Result<usize> {
383 let count = self.aliases.load_collection_tsv(namespace, Path::new(path))?;
384 self.persist_alias_namespace(AliasKind::Collection, namespace)?;
385 Ok(count)
386 }
387
388 pub(crate) fn persist_alias_namespace(&self, kind: AliasKind, namespace: &str) -> Result<()> {
390 if self.persist_to_disk {
391 if let Some(ref local_path) = self.local_path {
392 let aliases_dir = local_path.join("aliases");
393 self.aliases.write_namespace(&aliases_dir, kind, namespace)?;
394 }
395 }
396 Ok(())
397 }
398}
399
400impl RefgetStore {
405 pub fn add_sequence_alias(&mut self, namespace: &str, alias: &str, digest: &str) -> Result<()> {
407 self.inner.add_sequence_alias(namespace, alias, digest)
408 }
409
410 pub fn remove_sequence_alias(&mut self, namespace: &str, alias: &str) -> Result<bool> {
412 self.inner.remove_sequence_alias(namespace, alias)
413 }
414
415 pub fn load_sequence_aliases(&mut self, namespace: &str, path: &str) -> Result<usize> {
417 self.inner.load_sequence_aliases(namespace, path)
418 }
419
420 pub fn add_collection_alias(&mut self, namespace: &str, alias: &str, digest: &str) -> Result<()> {
422 self.inner.add_collection_alias(namespace, alias, digest)
423 }
424
425 pub fn remove_collection_alias(&mut self, namespace: &str, alias: &str) -> Result<bool> {
427 self.inner.remove_collection_alias(namespace, alias)
428 }
429
430 pub fn load_collection_aliases(&mut self, namespace: &str, path: &str) -> Result<usize> {
432 self.inner.load_collection_aliases(namespace, path)
433 }
434
435 pub fn get_collection_by_alias(
437 &mut self,
438 namespace: &str,
439 alias: &str,
440 ) -> Result<crate::digest::SequenceCollection> {
441 if let Some(meta) = self.inner.get_collection_metadata_by_alias(namespace, alias) {
442 let digest = meta.digest.clone();
443 if !self.inner.is_collection_loaded(&digest) {
444 self.inner.load_collection(&digest)?;
445 }
446 return self.inner.get_collection_by_alias(namespace, alias);
447 }
448 Err(anyhow::anyhow!("Collection alias not found: {}:{}", namespace, alias))
449 }
450
451 pub fn pull_aliases(
455 &mut self,
456 namespace: Option<&str>,
457 strategy: SyncStrategy,
458 ) -> Result<PullResult> {
459 let mut result = PullResult::default();
460
461 let seq_namespaces: Vec<String> = match namespace {
462 Some(ns) => vec![ns.to_string()],
463 None => self.inner.available_sequence_alias_namespaces.clone(),
464 };
465 let coll_namespaces: Vec<String> = match namespace {
466 Some(ns) => vec![ns.to_string()],
467 None => self.inner.available_collection_alias_namespaces.clone(),
468 };
469
470 for ns in &seq_namespaces {
471 self.pull_alias_file(ns, "sequences", &strategy, &mut result)?;
472 }
473
474 for ns in &coll_namespaces {
475 self.pull_alias_file(ns, "collections", &strategy, &mut result)?;
476 }
477
478 if result.pulled > 0 {
479 if let Some(ref local_path) = self.inner.local_path {
480 let aliases_dir = local_path.join("aliases");
481 self.inner.aliases = AliasManager::default();
482 self.inner.aliases.load_from_dir(&aliases_dir)?;
483 }
484 }
485
486 Ok(result)
487 }
488
489 fn pull_alias_file(
491 &self,
492 namespace: &str,
493 kind: &str,
494 strategy: &SyncStrategy,
495 result: &mut PullResult,
496 ) -> Result<()> {
497 let relative_path = format!("aliases/{}/{}.tsv", kind, namespace);
498
499 match strategy {
500 SyncStrategy::KeepOurs => {
501 let was_local = self
502 .inner
503 .local_path
504 .as_ref()
505 .map(|p| p.join(&relative_path).exists())
506 .unwrap_or(false);
507 match ReadonlyRefgetStore::fetch_file(
508 &self.inner.local_path,
509 &self.inner.remote_source,
510 &relative_path,
511 self.inner.persist_to_disk,
512 false,
513 ) {
514 Ok(_) => {
515 if was_local {
516 result.skipped += 1;
517 } else {
518 result.pulled += 1;
519 }
520 }
521 Err(_) => {
522 result.not_found += 1;
523 }
524 }
525 }
526 SyncStrategy::KeepTheirs => {
527 match ReadonlyRefgetStore::fetch_file(
528 &self.inner.local_path,
529 &self.inner.remote_source,
530 &relative_path,
531 self.inner.persist_to_disk,
532 true,
533 ) {
534 Ok(_) => {
535 result.pulled += 1;
536 }
537 Err(_) => {
538 result.not_found += 1;
539 }
540 }
541 }
542 SyncStrategy::Notify => {
543 let local_exists = self
544 .inner
545 .local_path
546 .as_ref()
547 .map(|p| p.join(&relative_path).exists())
548 .unwrap_or(false);
549
550 if local_exists {
551 match ReadonlyRefgetStore::fetch_file(
552 &None,
553 &self.inner.remote_source,
554 &relative_path,
555 false,
556 false,
557 ) {
558 Ok(remote_data) => {
559 let local_path = self
560 .inner
561 .local_path
562 .as_ref()
563 .unwrap()
564 .join(&relative_path);
565 let local_data = fs::read(&local_path)?;
566 if local_data != remote_data {
567 result.conflicts.push(relative_path);
568 } else {
569 result.skipped += 1;
570 }
571 }
572 Err(_) => {
573 result.not_found += 1;
574 }
575 }
576 } else {
577 result.conflicts.push(relative_path);
578 }
579 }
580 }
581
582 Ok(())
583 }
584}
585
586#[cfg(test)]
591mod tests {
592 use super::*;
593 use tempfile::tempdir;
594
595 fn start_file_server(serve_dir: std::path::PathBuf) -> (String, impl FnOnce()) {
609 use std::io::{Read as _, Write as _};
610 use std::net::TcpListener;
611 use std::sync::{Arc, atomic::{AtomicBool, Ordering}};
612
613 let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
614 let port = listener.local_addr().unwrap().port();
615 let base_url = format!("http://127.0.0.1:{}", port);
616 let stop = Arc::new(AtomicBool::new(false));
617 let stop_clone = Arc::clone(&stop);
618
619 std::thread::spawn(move || {
620 listener.set_nonblocking(false).ok();
621 while !stop_clone.load(Ordering::Relaxed) {
622 match listener.accept() {
623 Ok((mut stream, _)) => {
624 let mut buf = [0u8; 4096];
625 let n = stream.read(&mut buf).unwrap_or(0);
626 let request = std::str::from_utf8(&buf[..n]).unwrap_or("");
627 let path = request
629 .lines()
630 .next()
631 .and_then(|l| l.split_whitespace().nth(1))
632 .unwrap_or("/");
633 let rel = path.trim_start_matches('/');
635 let file_path = serve_dir.join(rel);
636 if file_path.exists() && file_path.is_file() {
637 let data = std::fs::read(&file_path).unwrap_or_default();
638 let header = format!(
639 "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
640 data.len()
641 );
642 let _ = stream.write_all(header.as_bytes());
643 let _ = stream.write_all(&data);
644 } else {
645 let body = b"Not Found";
646 let header = format!(
647 "HTTP/1.1 404 Not Found\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
648 body.len()
649 );
650 let _ = stream.write_all(header.as_bytes());
651 let _ = stream.write_all(body);
652 }
653 }
654 Err(_) => break,
655 }
656 }
657 });
658
659 let shutdown = move || {
660 stop.store(true, Ordering::Relaxed);
661 let _ = std::net::TcpStream::connect(format!("127.0.0.1:{}", port));
663 };
664
665 (base_url, shutdown)
666 }
667
668 #[test]
671 fn test_add_and_resolve_sequence() {
672 let mut mgr = AliasManager::new();
673 assert!(mgr.is_empty());
674
675 mgr.add_sequence("ncbi", "NC_000001.11", "abc123");
676 assert!(!mgr.is_empty());
677
678 let key = mgr.resolve_sequence("ncbi", "NC_000001.11");
679 assert!(key.is_some());
680 assert_eq!(key.unwrap(), "abc123".to_key());
681
682 assert!(mgr.resolve_sequence("ncbi", "missing").is_none());
683 assert!(mgr.resolve_sequence("missing_ns", "NC_000001.11").is_none());
684 }
685
686 #[test]
687 fn test_reverse_lookup() {
688 let mut mgr = AliasManager::new();
689 mgr.add_sequence("ncbi", "NC_000001.11", "digest1");
690 mgr.add_sequence("ucsc", "chr1", "digest1");
691 mgr.add_sequence("ncbi", "NC_000002.12", "digest2");
692
693 let aliases = mgr.reverse_lookup_sequence("digest1");
694 assert_eq!(aliases.len(), 2);
695 assert!(aliases.contains(&("ncbi".to_string(), "NC_000001.11".to_string())));
696 assert!(aliases.contains(&("ucsc".to_string(), "chr1".to_string())));
697
698 let aliases2 = mgr.reverse_lookup_sequence("digest2");
699 assert_eq!(aliases2.len(), 1);
700 }
701
702 #[test]
703 fn test_namespaces_and_list() {
704 let mut mgr = AliasManager::new();
705 mgr.add_sequence("ncbi", "NC_000001.11", "d1");
706 mgr.add_sequence("ucsc", "chr1", "d1");
707
708 let ns = mgr.sequence_namespaces();
709 assert!(ns.contains(&"ncbi".to_string()));
710 assert!(ns.contains(&"ucsc".to_string()));
711
712 let aliases = mgr.sequence_aliases("ncbi").unwrap();
713 assert!(aliases.contains(&"NC_000001.11".to_string()));
714 }
715
716 #[test]
717 fn test_remove() {
718 let mut mgr = AliasManager::new();
719 mgr.add_sequence("ncbi", "NC_000001.11", "d1");
720
721 assert!(mgr.remove_sequence("ncbi", "NC_000001.11"));
722 assert!(mgr.resolve_sequence("ncbi", "NC_000001.11").is_none());
723 assert!(mgr.sequence_namespaces().is_empty());
724
725 assert!(!mgr.remove_sequence("ncbi", "NC_000001.11"));
726 }
727
728 #[test]
729 fn test_collection_aliases() {
730 let mut mgr = AliasManager::new();
731 mgr.add_collection("ucsc", "hg38", "coll_digest");
732 mgr.add_collection("gencode", "GRCh38.p14", "coll_digest");
733
734 assert!(mgr.resolve_collection("ucsc", "hg38").is_some());
735 assert_eq!(mgr.collection_namespaces().len(), 2);
736
737 let aliases = mgr.reverse_lookup_collection("coll_digest");
738 assert_eq!(aliases.len(), 2);
739
740 assert!(mgr.remove_collection("ucsc", "hg38"));
741 assert!(mgr.resolve_collection("ucsc", "hg38").is_none());
742 }
743
744 #[test]
745 fn test_persistence_roundtrip() {
746 let dir = tempdir().unwrap();
747 let aliases_dir = dir.path().join("aliases");
748
749 let mut mgr = AliasManager::new();
750 mgr.add_sequence("ncbi", "NC_000001.11", "seq_digest");
751 mgr.add_collection("ucsc", "hg38", "coll_digest");
752
753 mgr.write_to_dir(&aliases_dir).unwrap();
754
755 assert!(aliases_dir.join("sequences/ncbi.tsv").exists());
756 assert!(aliases_dir.join("collections/ucsc.tsv").exists());
757
758 let mut mgr2 = AliasManager::new();
759 mgr2.load_from_dir(&aliases_dir).unwrap();
760
761 assert!(mgr2.resolve_sequence("ncbi", "NC_000001.11").is_some());
762 assert!(mgr2.resolve_collection("ucsc", "hg38").is_some());
763 }
764
765 #[test]
766 fn test_load_from_missing_dir() {
767 let mut mgr = AliasManager::new();
768 mgr.load_from_dir(std::path::Path::new("/nonexistent/aliases")).unwrap();
769 assert!(mgr.is_empty());
770 }
771
772 #[test]
773 fn test_write_namespace_single() {
774 let dir = tempdir().unwrap();
775 let aliases_dir = dir.path().join("aliases");
776
777 let mut mgr = AliasManager::new();
778 mgr.add_sequence("ncbi", "NC_000001.11", "d1");
779 mgr.add_sequence("ucsc", "chr1", "d2");
780
781 mgr.write_namespace(&aliases_dir, AliasKind::Sequence, "ncbi").unwrap();
782
783 assert!(aliases_dir.join("sequences/ncbi.tsv").exists());
784 assert!(!aliases_dir.join("sequences/ucsc.tsv").exists());
785 }
786
787 #[test]
788 fn test_empty_write_is_noop() {
789 let dir = tempdir().unwrap();
790 let aliases_dir = dir.path().join("aliases");
791
792 let mgr = AliasManager::new();
793 mgr.write_to_dir(&aliases_dir).unwrap();
794
795 assert!(!aliases_dir.join("sequences").exists());
796 assert!(!aliases_dir.join("collections").exists());
797 }
798
799 #[test]
800 fn test_load_tsv() {
801 let dir = tempdir().unwrap();
802 let tsv_path = dir.path().join("ncbi.tsv");
803 std::fs::write(&tsv_path, "NC_000001.11\tsome_digest\n# comment\n\nNC_000002.12\tanother_digest\n").unwrap();
804
805 let mut mgr = AliasManager::new();
806 let count = mgr.load_sequence_tsv("ncbi", &tsv_path).unwrap();
807 assert_eq!(count, 2);
808 assert!(mgr.resolve_sequence("ncbi", "NC_000001.11").is_some());
809 assert!(mgr.resolve_sequence("ncbi", "NC_000002.12").is_some());
810 }
811
812 fn copy_test_fasta(temp_dir: &std::path::Path, name: &str) -> std::path::PathBuf {
815 let src = format!("../tests/data/fasta/{}", name);
816 let dst = temp_dir.join(name);
817 std::fs::copy(&src, &dst)
818 .unwrap_or_else(|e| panic!("Failed to copy {} to tempdir: {}", src, e));
819 dst
820 }
821
822 #[test]
823 fn test_store_sequence_aliases() {
824 use crate::collection::digest_sequence;
825
826 let mut store = RefgetStore::in_memory();
827 let record = digest_sequence("chr1", b"ACGT");
828 store.add_sequence_record(record.clone(), false).unwrap();
829
830 let digest = record.metadata().sha512t24u.clone();
831 store.add_sequence_alias("ncbi", "NC_000001.11", &digest).unwrap();
832 store.add_sequence_alias("ucsc", "chr1", &digest).unwrap();
833
834 let found = store.get_sequence_metadata_by_alias("ncbi", "NC_000001.11").unwrap();
835 assert_eq!(found.name, "chr1");
836
837 let aliases = store.get_aliases_for_sequence(&digest);
838 assert_eq!(aliases.len(), 2);
839 assert!(aliases.contains(&("ncbi".to_string(), "NC_000001.11".to_string())));
840 assert!(aliases.contains(&("ucsc".to_string(), "chr1".to_string())));
841
842 let ns = store.list_sequence_alias_namespaces();
843 assert!(ns.contains(&"ncbi".to_string()));
844 assert!(ns.contains(&"ucsc".to_string()));
845
846 let aliases = store.list_sequence_aliases("ncbi").unwrap();
847 assert!(aliases.contains(&"NC_000001.11".to_string()));
848 }
849
850 #[test]
851 fn test_store_collection_aliases() {
852 let temp = tempdir().unwrap();
853 let fasta_path = copy_test_fasta(temp.path(), "base.fa");
854
855 let mut store = RefgetStore::in_memory();
856 let (meta, _) = store
857 .add_sequence_collection_from_fasta(&fasta_path, FastaImportOptions::new())
858 .unwrap();
859
860 store.add_collection_alias("ucsc", "hg38", &meta.digest).unwrap();
861 store.add_collection_alias("gencode", "GRCh38.p14", &meta.digest).unwrap();
862
863 let coll = store.get_collection_metadata_by_alias("ucsc", "hg38").unwrap();
864 assert_eq!(coll.digest, meta.digest);
865
866 let aliases = store.get_aliases_for_collection(&meta.digest);
867 assert_eq!(aliases.len(), 2);
868 }
869
870 #[test]
871 fn test_store_alias_remove() {
872 use crate::collection::digest_sequence;
873
874 let mut store = RefgetStore::in_memory();
875 let record = digest_sequence("chr1", b"ACGT");
876 store.add_sequence_record(record.clone(), false).unwrap();
877 let digest = record.metadata().sha512t24u.clone();
878
879 store.add_sequence_alias("ncbi", "NC_000001.11", &digest).unwrap();
880 assert!(store.get_sequence_metadata_by_alias("ncbi", "NC_000001.11").is_some());
881
882 assert!(store.remove_sequence_alias("ncbi", "NC_000001.11").unwrap());
883 assert!(store.get_sequence_metadata_by_alias("ncbi", "NC_000001.11").is_none());
884
885 assert!(store.list_sequence_alias_namespaces().is_empty());
886 }
887
888 #[test]
889 fn test_store_alias_persistence() {
890 let dir = tempdir().unwrap();
891 let store_path = dir.path().join("store");
892
893 let fasta_temp = tempdir().unwrap();
894 let fasta_path = copy_test_fasta(fasta_temp.path(), "base.fa");
895
896 let digest: String;
897 let seq_digest: String;
898 {
899 let mut store = RefgetStore::on_disk(&store_path).unwrap();
900 let (meta, _) = store
901 .add_sequence_collection_from_fasta(&fasta_path, FastaImportOptions::new())
902 .unwrap();
903 digest = meta.digest.clone();
904 seq_digest = store.list_sequences()[0].sha512t24u.clone();
905
906 store.add_sequence_alias("ncbi", "NC_000001.11", &seq_digest).unwrap();
907 store.add_collection_alias("ucsc", "hg38", &digest).unwrap();
908 }
909
910 {
911 let store = RefgetStore::open_local(&store_path).unwrap();
912 assert!(store.get_sequence_metadata_by_alias("ncbi", "NC_000001.11").is_some());
913 assert!(store.get_collection_metadata_by_alias("ucsc", "hg38").is_some());
914
915 assert!(store_path.join("aliases/sequences/ncbi.tsv").exists());
916 assert!(store_path.join("aliases/collections/ucsc.tsv").exists());
917 }
918 }
919
920 #[test]
921 fn test_store_alias_load_tsv() {
922 use crate::collection::digest_sequence;
923
924 let dir = tempdir().unwrap();
925 let tsv_path = dir.path().join("ncbi.tsv");
926
927 let mut store = RefgetStore::in_memory();
928 let record = digest_sequence("chr1", b"ACGT");
929 store.add_sequence_record(record.clone(), false).unwrap();
930 let digest = record.metadata().sha512t24u.clone();
931
932 std::fs::write(&tsv_path, format!("NC_000001.11\t{}\n", digest)).unwrap();
933
934 let count = store.load_sequence_aliases("ncbi", tsv_path.to_str().unwrap()).unwrap();
935 assert_eq!(count, 1);
936 assert!(store.get_sequence_metadata_by_alias("ncbi", "NC_000001.11").is_some());
937 }
938
939 #[test]
940 fn test_store_alias_reverse_multiple_sequences() {
941 use crate::collection::digest_sequence;
942
943 let mut store = RefgetStore::in_memory();
944 let r1 = digest_sequence("chr1", b"ACGT");
945 let r2 = digest_sequence("chr2", b"TTTT");
946 store.add_sequence_record(r1.clone(), false).unwrap();
947 store.add_sequence_record(r2.clone(), false).unwrap();
948
949 let d1 = r1.metadata().sha512t24u.clone();
950 let d2 = r2.metadata().sha512t24u.clone();
951
952 store.add_sequence_alias("ncbi", "NC_000001.11", &d1).unwrap();
953 store.add_sequence_alias("ucsc", "chr1", &d1).unwrap();
954 store.add_sequence_alias("ncbi", "NC_000002.12", &d2).unwrap();
955
956 let aliases = store.get_aliases_for_sequence(&d1);
957 assert_eq!(aliases.len(), 2);
958
959 let aliases = store.get_aliases_for_sequence(&d2);
960 assert_eq!(aliases.len(), 1);
961 }
962
963 #[test]
964 fn test_store_alias_write_store_to_dir() {
965 use crate::collection::digest_sequence;
966
967 let dir = tempdir().unwrap();
968 let store_path = dir.path().join("store");
969
970 let mut store = RefgetStore::in_memory();
971 let record = digest_sequence("chr1", b"ACGT");
972 store.add_sequence_record(record.clone(), false).unwrap();
973 let digest = record.metadata().sha512t24u.clone();
974
975 store.add_sequence_alias("ncbi", "NC_000001.11", &digest).unwrap();
976
977 store.write_store_to_dir(&store_path, None).unwrap();
978
979 assert!(store_path.join("aliases/sequences/ncbi.tsv").exists());
980
981 let store2 = RefgetStore::open_local(&store_path).unwrap();
982 assert!(store2.get_sequence_metadata_by_alias("ncbi", "NC_000001.11").is_some());
983 }
984
985 #[test]
986 fn test_get_sequence_metadata_by_alias() {
987 use crate::collection::digest_sequence;
988
989 let mut store = RefgetStore::in_memory();
990 let record = digest_sequence("chr1", b"ACGT");
991 store.add_sequence_record(record.clone(), false).unwrap();
992 let digest = record.metadata().sha512t24u.clone();
993
994 store.add_sequence_alias("ncbi", "NC_000001.11", &digest).unwrap();
995
996 let meta = store.get_sequence_metadata_by_alias("ncbi", "NC_000001.11").unwrap();
997 assert_eq!(meta.name, "chr1");
998 assert_eq!(meta.length, 4);
999 }
1000
1001 #[test]
1002 fn test_get_sequence_by_alias_loads_data() {
1003 use crate::collection::digest_sequence;
1004
1005 let mut store = RefgetStore::in_memory();
1006 let record = digest_sequence("chr1", b"ACGT");
1007 store.add_sequence_record(record.clone(), false).unwrap();
1008 let digest = record.metadata().sha512t24u.clone();
1009
1010 store.add_sequence_alias("ncbi", "NC_000001.11", &digest).unwrap();
1011
1012 let rec = store.get_sequence_by_alias("ncbi", "NC_000001.11").unwrap();
1013 assert_eq!(rec.metadata().name, "chr1");
1014 }
1015
1016 #[test]
1017 fn test_get_collection_metadata_by_alias() {
1018 let temp = tempdir().unwrap();
1019 let fasta_path = copy_test_fasta(temp.path(), "base.fa");
1020
1021 let mut store = RefgetStore::in_memory();
1022 let (meta, _) = store.add_sequence_collection_from_fasta(&fasta_path, FastaImportOptions::new()).unwrap();
1023
1024 store.add_collection_alias("ucsc", "hg38", &meta.digest).unwrap();
1025
1026 let coll_meta = store.get_collection_metadata_by_alias("ucsc", "hg38").unwrap();
1027 assert_eq!(coll_meta.digest, meta.digest);
1028 }
1029
1030 #[test]
1031 fn test_get_collection_by_alias_loads() {
1032 let temp = tempdir().unwrap();
1033 let fasta_path = copy_test_fasta(temp.path(), "base.fa");
1034
1035 let mut store = RefgetStore::in_memory();
1036 let (meta, _) = store.add_sequence_collection_from_fasta(&fasta_path, FastaImportOptions::new()).unwrap();
1037
1038 store.add_collection_alias("ucsc", "hg38", &meta.digest).unwrap();
1039
1040 let coll = store.get_collection_by_alias("ucsc", "hg38").unwrap();
1041 assert_eq!(coll.metadata.digest, meta.digest);
1042 assert!(!coll.sequences.is_empty());
1043 }
1044
1045 #[test]
1046 fn test_get_sequence_by_alias_not_found() {
1047 let store = RefgetStore::in_memory();
1048 assert!(store.get_sequence_metadata_by_alias("ncbi", "nonexistent").is_none());
1049 }
1050
1051 #[test]
1052 fn test_get_sequence_by_alias_error_not_found() {
1053 let store = RefgetStore::in_memory();
1054 assert!(store.get_sequence_by_alias("ncbi", "nonexistent").is_err());
1055 }
1056
1057 #[test]
1058 fn test_fasta_load_with_namespace_aliases() {
1059 let dir = tempdir().unwrap();
1060 let fasta = dir.path().join("test.fa");
1061 fs::write(
1062 &fasta,
1063 ">chr1 ncbi:NC_000001.11 refseq:NC_000001.11\nACGT\n>chr2 ncbi:NC_000002.12\nTGCA\n",
1064 )
1065 .unwrap();
1066
1067 let mut store = RefgetStore::in_memory();
1068 store
1069 .add_sequence_collection_from_fasta(&fasta, FastaImportOptions::new().namespaces(&["ncbi", "refseq"]))
1070 .unwrap();
1071
1072 let result = store.get_sequence_by_alias("ncbi", "NC_000001.11");
1073 assert!(result.is_ok());
1074 assert_eq!(result.unwrap().metadata().name, "chr1");
1075
1076 let result = store.get_sequence_by_alias("refseq", "NC_000001.11");
1077 assert!(result.is_ok());
1078
1079 let result = store.get_sequence_by_alias("ncbi", "NC_000002.12");
1080 assert!(result.is_ok());
1081 assert_eq!(result.unwrap().metadata().name, "chr2");
1082
1083 let result = store.get_sequence_by_alias("ncbi", "NC_999999.1");
1084 assert!(result.is_err());
1085 }
1086
1087 #[test]
1088 fn test_fasta_load_without_namespaces_no_aliases() {
1089 let dir = tempdir().unwrap();
1090 let fasta = dir.path().join("test.fa");
1091 fs::write(&fasta, ">chr1 ncbi:NC_000001.11\nACGT\n").unwrap();
1092
1093 let mut store = RefgetStore::in_memory();
1094 store
1095 .add_sequence_collection_from_fasta(&fasta, FastaImportOptions::new())
1096 .unwrap();
1097
1098 let result = store.get_sequence_by_alias("ncbi", "NC_000001.11");
1099 assert!(result.is_err());
1100 }
1101
1102 #[test]
1103 fn test_remove_collection_cleans_up_aliases() {
1104 let dir = tempdir().unwrap();
1105 let fasta = dir.path().join("test.fa");
1106 fs::write(&fasta, ">chr1\nACGT\n").unwrap();
1107
1108 let mut store = RefgetStore::in_memory();
1109 let (meta, _) = store
1110 .add_sequence_collection_from_fasta(&fasta, FastaImportOptions::new())
1111 .unwrap();
1112 let digest = meta.digest;
1113
1114 store.add_collection_alias("ucsc", "hg38", &digest).unwrap();
1115 assert!(store.get_collection_metadata_by_alias("ucsc", "hg38").is_some());
1116
1117 store.remove_collection(&digest, false).unwrap();
1118 assert!(store.get_collection_metadata_by_alias("ucsc", "hg38").is_none());
1119 }
1120
1121 #[test]
1122 fn test_manifest_namespace_roundtrip() {
1123 let dir = tempdir().unwrap();
1124 let store_path = dir.path().join("store");
1125
1126 let mut store = RefgetStore::in_memory();
1127 let fasta_path = dir.path().join("test.fa");
1128 fs::write(&fasta_path, ">seq1\nACGT\n").unwrap();
1129 store
1130 .add_sequence_collection_from_fasta(fasta_path.to_str().unwrap(), FastaImportOptions::new())
1131 .unwrap();
1132
1133 let seq_digest = key_to_digest_string(&store.sequence_digests().next().unwrap());
1134 store.add_sequence_alias("ncbi", "NC_000001.11", &seq_digest).unwrap();
1135 let coll_digest = {
1136 let key = *store.collections.keys().next().unwrap();
1137 key_to_digest_string(&key)
1138 };
1139 store.add_collection_alias("ucsc", "hg38", &coll_digest).unwrap();
1140
1141 store.write_store_to_dir(&store_path, None).unwrap();
1142
1143 let json_str = fs::read_to_string(store_path.join("rgstore.json")).unwrap();
1144 let metadata: serde_json::Value = serde_json::from_str(&json_str).unwrap();
1145 assert!(metadata["sequence_alias_namespaces"].as_array().unwrap().iter().any(|v| v.as_str() == Some("ncbi")));
1146 assert!(metadata["collection_alias_namespaces"].as_array().unwrap().iter().any(|v| v.as_str() == Some("ucsc")));
1147
1148 let store2 = RefgetStore::open_local(&store_path).unwrap();
1149 let available = store2.available_alias_namespaces();
1150 assert!(available.sequences.contains(&"ncbi".to_string()));
1151 assert!(available.collections.contains(&"ucsc".to_string()));
1152 }
1153
1154 #[test]
1155 fn test_manifest_empty_namespaces_not_serialized() {
1156 let dir = tempdir().unwrap();
1157 let store_path = dir.path().join("store");
1158
1159 let mut store = RefgetStore::in_memory();
1160 let fasta_path = dir.path().join("test.fa");
1161 fs::write(&fasta_path, ">seq1\nACGT\n").unwrap();
1162 store
1163 .add_sequence_collection_from_fasta(fasta_path.to_str().unwrap(), FastaImportOptions::new())
1164 .unwrap();
1165 store.write_store_to_dir(&store_path, None).unwrap();
1166
1167 let json_str = fs::read_to_string(store_path.join("rgstore.json")).unwrap();
1168 assert!(!json_str.contains("sequence_alias_namespaces"));
1169 assert!(!json_str.contains("collection_alias_namespaces"));
1170 }
1171
1172 #[test]
1173 fn test_old_rgstore_json_without_namespaces() {
1174 let dir = tempdir().unwrap();
1175 let store_path = dir.path().join("store");
1176 fs::create_dir_all(&store_path).unwrap();
1177
1178 let mut store = RefgetStore::in_memory();
1179 let fasta_path = dir.path().join("test.fa");
1180 fs::write(&fasta_path, ">seq1\nACGT\n").unwrap();
1181 store
1182 .add_sequence_collection_from_fasta(fasta_path.to_str().unwrap(), FastaImportOptions::new())
1183 .unwrap();
1184 store.write_store_to_dir(&store_path, None).unwrap();
1185
1186 let store2 = RefgetStore::open_local(&store_path).unwrap();
1187 let available = store2.available_alias_namespaces();
1188 assert!(available.sequences.is_empty());
1189 assert!(available.collections.is_empty());
1190 }
1191
1192 #[test]
1199 fn test_keep_ours_alias_first_pull_counts_as_pulled() {
1200 let remote_dir = tempdir().unwrap();
1204 let seq_dir = remote_dir.path().join("aliases").join("sequences");
1205 let coll_dir = remote_dir.path().join("aliases").join("collections");
1206 fs::create_dir_all(&seq_dir).unwrap();
1207 fs::create_dir_all(&coll_dir).unwrap();
1208 fs::write(seq_dir.join("ncbi.tsv"), "NC_000001.11\tsome_digest\n").unwrap();
1209 fs::write(coll_dir.join("ncbi.tsv"), "hg38\tcoll_digest\n").unwrap();
1210
1211 let (base_url, shutdown) = start_file_server(remote_dir.path().to_path_buf());
1213
1214 let local_dir = tempdir().unwrap();
1216 let local_store_path = local_dir.path().join("store");
1217 fs::create_dir_all(&local_store_path).unwrap();
1218
1219 let mut store = RefgetStore::on_disk(&local_store_path).unwrap();
1220 store.inner.remote_source = Some(base_url);
1221 store.inner.available_sequence_alias_namespaces = vec!["ncbi".to_string()];
1224 store.inner.available_collection_alias_namespaces = vec!["ncbi".to_string()];
1225
1226 let result = store.pull_aliases(Some("ncbi"), SyncStrategy::KeepOurs).unwrap();
1229 assert_eq!(result.pulled, 2, "first pull should count both files as pulled, not skipped");
1230 assert_eq!(result.skipped, 0, "first pull should not be skipped");
1231 assert_eq!(result.not_found, 0);
1232
1233 let result2 = store.pull_aliases(Some("ncbi"), SyncStrategy::KeepOurs).unwrap();
1235 assert_eq!(result2.skipped, 2, "second pull should skip both files (already local)");
1236 assert_eq!(result2.pulled, 0, "second pull should not count any files as pulled");
1237
1238 shutdown();
1239 }
1240}