Skip to main content

refget_store/
fasta.rs

1//! FASTA-backed sequence store that loads sequences into memory.
2
3use std::collections::HashMap;
4use std::io::BufReader;
5use std::path::Path;
6
7use md5::{Digest, Md5};
8use noodles_fasta as fasta;
9use noodles_fasta::fai;
10use refget_digest::sha512t24u;
11use refget_model::{Alias, SequenceMetadata};
12use serde::{Deserialize, Serialize};
13
14use refget_model::SeqCol;
15use serde::de::DeserializeOwned;
16
17use crate::{SequenceStore, StoreError, StoreResult, extract_subsequence};
18
19/// Trait for FASTA sidecar cache files (e.g. `.refget.json`, `.seqcol.json`).
20///
21/// Provides default implementations for `cache_path_for`, `write`, and `load_if_fresh`
22/// based on the cache file extension.
23pub trait SidecarCache: Serialize + DeserializeOwned + Sized {
24    /// The sidecar file extension (e.g. `"refget.json"`, `"seqcol.json"`).
25    const EXTENSION: &str;
26
27    /// Return the standard cache path for a given FASTA file.
28    fn cache_path_for<P: AsRef<Path>>(fasta_path: P) -> std::path::PathBuf {
29        let p = fasta_path.as_ref();
30        let ext = p
31            .extension()
32            .map(|e| format!("{}.{}", e.to_string_lossy(), Self::EXTENSION))
33            .unwrap_or_else(|| Self::EXTENSION.to_string());
34        p.with_extension(ext)
35    }
36
37    /// Write the cache to its standard sidecar path.
38    fn write<P: AsRef<Path>>(&self, fasta_path: P) -> StoreResult<std::path::PathBuf> {
39        let cache_path = Self::cache_path_for(&fasta_path);
40        let json = serde_json::to_string_pretty(self)
41            .map_err(|e| StoreError::Fasta(format!("Failed to serialize cache: {e}")))?;
42        std::fs::write(&cache_path, json)?;
43        Ok(cache_path)
44    }
45
46    /// Try to load a cache file for the given FASTA, returning `None` if
47    /// the cache is missing or stale (older than the FASTA or its index).
48    fn load_if_fresh<P: AsRef<Path>>(fasta_path: P) -> Option<Self> {
49        let fasta_path = fasta_path.as_ref();
50        let cache_path = Self::cache_path_for(fasta_path);
51        let cache_meta = std::fs::metadata(&cache_path).ok()?;
52        let cache_mtime = cache_meta.modified().ok()?;
53
54        let fasta_mtime = std::fs::metadata(fasta_path).ok()?.modified().ok()?;
55        if cache_mtime < fasta_mtime {
56            return None;
57        }
58        let fai_path = fai_path_for(fasta_path);
59        if let Ok(fai_meta) = std::fs::metadata(&fai_path)
60            && let Ok(fai_mtime) = fai_meta.modified()
61            && cache_mtime < fai_mtime
62        {
63            return None;
64        }
65
66        let data = std::fs::read_to_string(&cache_path).ok()?;
67        serde_json::from_str(&data).ok()
68    }
69}
70
71/// A sequence store that holds all sequences in memory, loaded from indexed FASTA files.
72///
73/// Supports loading multiple FASTA files incrementally via [`add_fasta`](Self::add_fasta).
74/// Digest computation is skipped when a fresh `.refget.json` cache exists.
75pub struct FastaSequenceStore {
76    /// Map from digest (MD5 hex, `SQ.`-prefixed, or bare sha512t24u) to record index.
77    digest_index: HashMap<String, usize>,
78    /// Loaded records.
79    records: Vec<RecordData>,
80}
81
82struct RecordData {
83    _name: String,
84    sequence: Vec<u8>,
85    metadata: SequenceMetadata,
86}
87
88/// Summary info for each sequence loaded from a FASTA file, useful for
89/// building SeqCol objects.
90#[derive(Debug, Clone)]
91pub struct FastaSequenceSummary {
92    pub name: String,
93    pub length: u64,
94    pub md5: String,
95    pub sha512t24u: String,
96    pub circular: bool,
97}
98
99/// Pre-computed digest cache for a FASTA file.
100///
101/// Written as `{fasta_path}.refget.json`. When present and up-to-date,
102/// the server skips digest computation at startup.
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct DigestCache {
105    pub sequences: Vec<CachedDigest>,
106}
107
108/// A single sequence's pre-computed digests.
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct CachedDigest {
111    pub name: String,
112    pub length: u64,
113    pub md5: String,
114    /// GA4GH digest with `SQ.` prefix.
115    pub sha512t24u: String,
116    /// Whether this sequence is circular.
117    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
118    pub circular: bool,
119}
120
121impl CachedDigest {
122    /// Build a `SequenceMetadata` from this cached digest.
123    pub fn to_metadata(&self) -> SequenceMetadata {
124        SequenceMetadata {
125            md5: self.md5.clone(),
126            sha512t24u: self.sha512t24u.clone(),
127            length: self.length,
128            aliases: vec![Alias {
129                naming_authority: "refseq".to_string(),
130                value: self.name.clone(),
131            }],
132            circular: self.circular,
133        }
134    }
135
136    /// Build a `FastaSequenceSummary` from this cached digest.
137    pub fn to_summary(&self) -> FastaSequenceSummary {
138        FastaSequenceSummary {
139            name: self.name.clone(),
140            length: self.length,
141            md5: self.md5.clone(),
142            sha512t24u: self.sha512t24u.clone(),
143            circular: self.circular,
144        }
145    }
146}
147
148impl SidecarCache for DigestCache {
149    const EXTENSION: &str = "refget.json";
150}
151
152impl DigestCache {
153    /// Compute digests for all sequences in a FASTA file.
154    pub fn from_fasta<P: AsRef<Path>>(path: P) -> StoreResult<Self> {
155        let path = path.as_ref();
156        let index = read_fai_index(path)?;
157        let index_records: &[fai::Record] = index.as_ref();
158
159        let file = std::fs::File::open(path)?;
160        let mut reader = fasta::io::Reader::new(BufReader::new(file));
161        let mut sequences = Vec::new();
162
163        for (idx, result) in reader.records().enumerate() {
164            let record = result
165                .map_err(|e| StoreError::Fasta(format!("Failed to read FASTA record: {e}")))?;
166
167            let name = std::str::from_utf8(record.name())
168                .map_err(|e| StoreError::Fasta(format!("Invalid UTF-8 in sequence name: {e}")))?
169                .to_string();
170            let seq_bytes: Vec<u8> = record
171                .sequence()
172                .as_ref()
173                .iter()
174                .copied()
175                .filter(|b| !b.is_ascii_whitespace())
176                .map(|b| b.to_ascii_uppercase())
177                .collect();
178
179            let length = seq_bytes.len() as u64;
180
181            if let Some(fai_record) = index_records.get(idx) {
182                let expected_length = fai_record.length() as usize;
183                if expected_length != seq_bytes.len() {
184                    return Err(StoreError::Fasta(format!(
185                        "Sequence length mismatch for {name}: index says {expected_length}, got {}",
186                        seq_bytes.len()
187                    )));
188                }
189            }
190
191            let md5 = format!("{:x}", Md5::digest(&seq_bytes));
192            let sha512t24u = format!("SQ.{}", sha512t24u(&seq_bytes));
193
194            sequences.push(CachedDigest { name, length, md5, sha512t24u, circular: false });
195        }
196
197        Ok(Self { sequences })
198    }
199}
200
201/// Pre-computed SeqCol cache for a FASTA file.
202///
203/// Written as `{fasta_path}.seqcol.json`. When present and up-to-date,
204/// the server skips SeqCol construction at startup.
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct SeqColCache {
207    pub collection: SeqCol,
208}
209
210impl SidecarCache for SeqColCache {
211    const EXTENSION: &str = "seqcol.json";
212}
213
214impl SeqColCache {
215    /// Build a `SeqColCache` from per-sequence summaries.
216    pub fn from_summaries(summaries: &[FastaSequenceSummary]) -> Self {
217        let collection = SeqCol {
218            names: summaries.iter().map(|s| s.name.clone()).collect(),
219            lengths: summaries.iter().map(|s| s.length).collect(),
220            sequences: summaries.iter().map(|s| s.sha512t24u.clone()).collect(),
221            sorted_name_length_pairs: None,
222        };
223        Self { collection }
224    }
225}
226
227/// Return the .fai index path for a given FASTA path.
228pub(crate) fn fai_path_for(path: &Path) -> std::path::PathBuf {
229    path.with_extension(
230        path.extension()
231            .map(|e| format!("{}.fai", e.to_string_lossy()))
232            .unwrap_or_else(|| "fai".to_string()),
233    )
234}
235
236/// Read and parse a `.fai` index file.
237pub(crate) fn read_fai_index(path: &Path) -> StoreResult<fai::Index> {
238    let fai_path = fai_path_for(path);
239    if !fai_path.exists() {
240        return Err(StoreError::Fasta(format!("FASTA index not found: {}", fai_path.display())));
241    }
242    let fai_reader = BufReader::new(std::fs::File::open(&fai_path)?);
243    fai::Reader::new(fai_reader)
244        .read_index()
245        .map_err(|e| StoreError::Fasta(format!("Failed to read FASTA index: {e}")))
246}
247
248/// Index a sequence by its digests (md5, SQ.-prefixed, and bare sha512t24u).
249pub(crate) fn index_digests(
250    digest_index: &mut HashMap<String, usize>,
251    cached: &CachedDigest,
252    record_idx: usize,
253) {
254    digest_index.insert(cached.md5.clone(), record_idx);
255    digest_index.insert(cached.sha512t24u.clone(), record_idx);
256    let bare_sha = cached.sha512t24u.strip_prefix("SQ.").unwrap_or(&cached.sha512t24u).to_string();
257    digest_index.insert(bare_sha, record_idx);
258}
259
260impl FastaSequenceStore {
261    /// Create an empty store.
262    pub fn new() -> Self {
263        Self { digest_index: HashMap::new(), records: Vec::new() }
264    }
265
266    /// Load a FASTA file into this store, appending to existing data.
267    ///
268    /// Uses a digest cache if fresh, otherwise computes digests on the fly.
269    /// Returns summaries for the sequences loaded from this file.
270    pub fn add_fasta<P: AsRef<Path>>(&mut self, path: P) -> StoreResult<Vec<FastaSequenceSummary>> {
271        let path = path.as_ref();
272        let cache = DigestCache::load_if_fresh(path);
273        let computed;
274        let cache = match &cache {
275            Some(c) => c,
276            None => {
277                computed = DigestCache::from_fasta(path)?;
278                &computed
279            }
280        };
281        self.add_fasta_with_cache(path, cache)
282    }
283
284    /// Load a FASTA file using pre-computed digests from a cache.
285    fn add_fasta_with_cache(
286        &mut self,
287        path: &Path,
288        cache: &DigestCache,
289    ) -> StoreResult<Vec<FastaSequenceSummary>> {
290        let file = std::fs::File::open(path)?;
291        let mut reader = fasta::io::Reader::new(BufReader::new(file));
292        let mut summaries = Vec::new();
293
294        for (idx, result) in reader.records().enumerate() {
295            let record = result
296                .map_err(|e| StoreError::Fasta(format!("Failed to read FASTA record: {e}")))?;
297
298            let seq_bytes: Vec<u8> = record
299                .sequence()
300                .as_ref()
301                .iter()
302                .copied()
303                .filter(|b| !b.is_ascii_whitespace())
304                .map(|b| b.to_ascii_uppercase())
305                .collect();
306
307            let cached = cache.sequences.get(idx).ok_or_else(|| {
308                StoreError::Fasta(format!(
309                    "Digest cache has {} entries but FASTA has more sequences",
310                    cache.sequences.len()
311                ))
312            })?;
313
314            if cached.length != seq_bytes.len() as u64 {
315                return Err(StoreError::Fasta(format!(
316                    "Cache length mismatch for {}: cache says {}, got {}",
317                    cached.name,
318                    cached.length,
319                    seq_bytes.len()
320                )));
321            }
322
323            let metadata = cached.to_metadata();
324            summaries.push(cached.to_summary());
325
326            let record_idx = self.records.len();
327            index_digests(&mut self.digest_index, cached, record_idx);
328            self.records.push(RecordData {
329                _name: cached.name.clone(),
330                sequence: seq_bytes,
331                metadata,
332            });
333        }
334
335        Ok(summaries)
336    }
337
338    /// Mark sequences with matching names as circular.
339    pub fn mark_circular(&mut self, circular_names: &[String]) {
340        for record in &mut self.records {
341            if circular_names.iter().any(|n| n == &record._name) {
342                record.metadata.circular = true;
343            }
344        }
345    }
346
347    /// Convenience: load a single FASTA file and return the store + summaries.
348    pub fn from_fasta<P: AsRef<Path>>(path: P) -> StoreResult<(Self, Vec<FastaSequenceSummary>)> {
349        let mut store = Self::new();
350        let summaries = store.add_fasta(path)?;
351        Ok((store, summaries))
352    }
353}
354
355impl Default for FastaSequenceStore {
356    fn default() -> Self {
357        Self::new()
358    }
359}
360
361impl SequenceStore for FastaSequenceStore {
362    fn get_sequence(
363        &self,
364        digest: &str,
365        start: Option<u64>,
366        end: Option<u64>,
367    ) -> StoreResult<Option<Vec<u8>>> {
368        let Some(&record_idx) = self.digest_index.get(digest) else {
369            return Ok(None);
370        };
371        let seq = &self.records[record_idx].sequence;
372        Ok(Some(extract_subsequence(seq, start, end)))
373    }
374
375    fn get_metadata(&self, digest: &str) -> StoreResult<Option<SequenceMetadata>> {
376        Ok(self.digest_index.get(digest).map(|&idx| self.records[idx].metadata.clone()))
377    }
378
379    fn get_length(&self, digest: &str) -> StoreResult<Option<u64>> {
380        Ok(self.digest_index.get(digest).map(|&idx| self.records[idx].metadata.length))
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387    use std::io::Write;
388    use tempfile::TempDir;
389
390    fn write_test_fasta(dir: &TempDir) -> std::path::PathBuf {
391        let fasta_path = dir.path().join("test.fa");
392        let mut f = std::fs::File::create(&fasta_path).unwrap();
393        writeln!(f, ">seq1").unwrap();
394        writeln!(f, "ACGTACGT").unwrap();
395        writeln!(f, ">seq2").unwrap();
396        writeln!(f, "NNNN").unwrap();
397
398        let fai_path = dir.path().join("test.fa.fai");
399        let mut fai = std::fs::File::create(&fai_path).unwrap();
400        writeln!(fai, "seq1\t8\t6\t8\t9").unwrap();
401        writeln!(fai, "seq2\t4\t21\t4\t5").unwrap();
402
403        fasta_path
404    }
405
406    #[test]
407    fn test_load_fasta() {
408        let dir = TempDir::new().unwrap();
409        let fasta_path = write_test_fasta(&dir);
410
411        let (store, summaries) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
412        assert_eq!(summaries.len(), 2);
413        assert_eq!(summaries[0].name, "seq1");
414        assert_eq!(summaries[0].length, 8);
415        assert_eq!(summaries[1].name, "seq2");
416        assert_eq!(summaries[1].length, 4);
417
418        let seq = store.get_sequence(&summaries[0].sha512t24u, None, None).unwrap().unwrap();
419        assert_eq!(seq, b"ACGTACGT");
420
421        let seq = store.get_sequence(&summaries[0].md5, None, None).unwrap().unwrap();
422        assert_eq!(seq, b"ACGTACGT");
423    }
424
425    #[test]
426    fn test_add_multiple_fastas() {
427        let dir = TempDir::new().unwrap();
428
429        // First FASTA
430        let fa1 = dir.path().join("a.fa");
431        let mut f = std::fs::File::create(&fa1).unwrap();
432        writeln!(f, ">seq1\nACGT").unwrap();
433        let mut fai = std::fs::File::create(dir.path().join("a.fa.fai")).unwrap();
434        writeln!(fai, "seq1\t4\t6\t4\t5").unwrap();
435
436        // Second FASTA
437        let fa2 = dir.path().join("b.fa");
438        let mut f = std::fs::File::create(&fa2).unwrap();
439        writeln!(f, ">seq2\nTTTT").unwrap();
440        let mut fai = std::fs::File::create(dir.path().join("b.fa.fai")).unwrap();
441        writeln!(fai, "seq2\t4\t6\t4\t5").unwrap();
442
443        let mut store = FastaSequenceStore::new();
444        let s1 = store.add_fasta(&fa1).unwrap();
445        let s2 = store.add_fasta(&fa2).unwrap();
446
447        assert_eq!(s1.len(), 1);
448        assert_eq!(s2.len(), 1);
449
450        let seq1 = store.get_sequence(&s1[0].sha512t24u, None, None).unwrap().unwrap();
451        assert_eq!(seq1, b"ACGT");
452        let seq2 = store.get_sequence(&s2[0].sha512t24u, None, None).unwrap().unwrap();
453        assert_eq!(seq2, b"TTTT");
454    }
455
456    #[test]
457    fn test_subsequence() {
458        let dir = TempDir::new().unwrap();
459        let fasta_path = write_test_fasta(&dir);
460
461        let (store, summaries) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
462        let seq = store.get_sequence(&summaries[0].sha512t24u, Some(2), Some(5)).unwrap().unwrap();
463        assert_eq!(seq, b"GTA");
464    }
465
466    #[test]
467    fn test_metadata() {
468        let dir = TempDir::new().unwrap();
469        let fasta_path = write_test_fasta(&dir);
470
471        let (store, summaries) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
472        let meta = store.get_metadata(&summaries[0].sha512t24u).unwrap().unwrap();
473        assert_eq!(meta.length, 8);
474        assert_eq!(meta.sha512t24u, summaries[0].sha512t24u);
475        assert!(meta.sha512t24u.starts_with("SQ."), "ga4gh digest must have SQ. prefix");
476        assert_eq!(meta.md5, summaries[0].md5);
477    }
478
479    #[test]
480    fn test_from_fasta_missing_fai_errors() {
481        let dir = TempDir::new().unwrap();
482        let fasta_path = dir.path().join("no_index.fa");
483        let mut f = std::fs::File::create(&fasta_path).unwrap();
484        writeln!(f, ">seq1").unwrap();
485        writeln!(f, "ACGT").unwrap();
486
487        let result = FastaSequenceStore::from_fasta(&fasta_path);
488        let err_msg = format!("{}", result.err().expect("Expected an error for missing .fai file"));
489        assert!(err_msg.contains("FASTA index not found"), "Unexpected error: {err_msg}");
490    }
491
492    #[test]
493    fn test_from_fasta_empty_fasta() {
494        let dir = TempDir::new().unwrap();
495        let fasta_path = dir.path().join("empty.fa");
496        std::fs::File::create(&fasta_path).unwrap();
497        std::fs::File::create(dir.path().join("empty.fa.fai")).unwrap();
498
499        let (store, summaries) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
500        assert!(summaries.is_empty());
501        assert!(store.get_sequence("anything", None, None).unwrap().is_none());
502    }
503
504    #[test]
505    fn test_get_sequence_non_existent_digest_returns_none() {
506        let dir = TempDir::new().unwrap();
507        let fasta_path = write_test_fasta(&dir);
508
509        let (store, _) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
510        assert!(store.get_sequence("no_such_digest", None, None).unwrap().is_none());
511    }
512
513    #[test]
514    fn test_get_sequence_start_beyond_length_returns_empty() {
515        let dir = TempDir::new().unwrap();
516        let fasta_path = write_test_fasta(&dir);
517
518        let (store, summaries) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
519        let seq = store.get_sequence(&summaries[0].sha512t24u, Some(100), None).unwrap().unwrap();
520        assert!(seq.is_empty());
521    }
522
523    #[test]
524    fn test_lowercase_sequences_are_uppercased() {
525        let dir = TempDir::new().unwrap();
526        let fasta_path = dir.path().join("lower.fa");
527        let mut f = std::fs::File::create(&fasta_path).unwrap();
528        writeln!(f, ">seq1").unwrap();
529        writeln!(f, "acgtACGT").unwrap();
530
531        let fai_path = dir.path().join("lower.fa.fai");
532        let mut fai = std::fs::File::create(&fai_path).unwrap();
533        writeln!(fai, "seq1\t8\t6\t8\t9").unwrap();
534
535        let (store, summaries) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
536        let seq = store.get_sequence(&summaries[0].sha512t24u, None, None).unwrap().unwrap();
537        assert_eq!(seq, b"ACGTACGT");
538    }
539
540    #[test]
541    fn test_digest_cache_write_and_load() {
542        let dir = TempDir::new().unwrap();
543        let fasta_path = write_test_fasta(&dir);
544
545        let cache = DigestCache::from_fasta(&fasta_path).unwrap();
546        assert_eq!(cache.sequences.len(), 2);
547        let cache_path = cache.write(&fasta_path).unwrap();
548        assert!(cache_path.exists());
549        assert!(cache_path.to_string_lossy().ends_with(".refget.json"));
550
551        let loaded = DigestCache::load_if_fresh(&fasta_path).unwrap();
552        assert_eq!(loaded.sequences.len(), 2);
553        assert_eq!(loaded.sequences[0].name, "seq1");
554        assert!(loaded.sequences[0].sha512t24u.starts_with("SQ."));
555    }
556
557    #[test]
558    fn test_from_fasta_with_cache_matches_without() {
559        let dir = TempDir::new().unwrap();
560        let fasta_path = write_test_fasta(&dir);
561
562        let (_, summaries_no_cache) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
563
564        let cache = DigestCache::from_fasta(&fasta_path).unwrap();
565        cache.write(&fasta_path).unwrap();
566        let (store, summaries_cached) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
567
568        assert_eq!(summaries_no_cache.len(), summaries_cached.len());
569        for (a, b) in summaries_no_cache.iter().zip(summaries_cached.iter()) {
570            assert_eq!(a.name, b.name);
571            assert_eq!(a.length, b.length);
572            assert_eq!(a.md5, b.md5);
573            assert_eq!(a.sha512t24u, b.sha512t24u);
574        }
575
576        let seq = store.get_sequence(&summaries_cached[0].sha512t24u, None, None).unwrap().unwrap();
577        assert_eq!(seq, b"ACGTACGT");
578    }
579
580    #[test]
581    fn test_stale_cache_is_ignored() {
582        let dir = TempDir::new().unwrap();
583        let fasta_path = write_test_fasta(&dir);
584
585        let cache = DigestCache::from_fasta(&fasta_path).unwrap();
586        cache.write(&fasta_path).unwrap();
587
588        std::thread::sleep(std::time::Duration::from_millis(50));
589        let mut f = std::fs::OpenOptions::new().append(true).open(&fasta_path).unwrap();
590        writeln!(f, ">seq3").unwrap();
591        writeln!(f, "TTTT").unwrap();
592
593        assert!(DigestCache::load_if_fresh(&fasta_path).is_none());
594    }
595
596    #[test]
597    fn test_seqcol_cache_write_and_load() {
598        let dir = TempDir::new().unwrap();
599        let fasta_path = write_test_fasta(&dir);
600
601        let (_, summaries) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
602        let cache = SeqColCache::from_summaries(&summaries);
603        assert_eq!(cache.collection.names.len(), 2);
604        assert_eq!(cache.collection.names[0], "seq1");
605        assert_eq!(cache.collection.lengths[0], 8);
606
607        let cache_path = cache.write(&fasta_path).unwrap();
608        assert!(cache_path.exists());
609        assert!(cache_path.to_string_lossy().ends_with(".seqcol.json"));
610
611        let loaded = SeqColCache::load_if_fresh(&fasta_path).unwrap();
612        assert_eq!(loaded.collection.names, cache.collection.names);
613        assert_eq!(loaded.collection.lengths, cache.collection.lengths);
614        assert_eq!(loaded.collection.sequences, cache.collection.sequences);
615        assert_eq!(loaded.collection.digest(), cache.collection.digest());
616    }
617
618    #[test]
619    fn test_seqcol_cache_stale_is_ignored() {
620        let dir = TempDir::new().unwrap();
621        let fasta_path = write_test_fasta(&dir);
622
623        let (_, summaries) = FastaSequenceStore::from_fasta(&fasta_path).unwrap();
624        let cache = SeqColCache::from_summaries(&summaries);
625        cache.write(&fasta_path).unwrap();
626
627        std::thread::sleep(std::time::Duration::from_millis(50));
628        let mut f = std::fs::OpenOptions::new().append(true).open(&fasta_path).unwrap();
629        writeln!(f, ">seq3").unwrap();
630        writeln!(f, "TTTT").unwrap();
631
632        assert!(SeqColCache::load_if_fresh(&fasta_path).is_none());
633    }
634
635    #[test]
636    fn test_seqcol_cache_path_for() {
637        assert_eq!(
638            SeqColCache::cache_path_for("test.fa"),
639            std::path::PathBuf::from("test.fa.seqcol.json")
640        );
641        assert_eq!(
642            SeqColCache::cache_path_for("dir/genome.fasta"),
643            std::path::PathBuf::from("dir/genome.fasta.seqcol.json")
644        );
645    }
646}