perbase_lib/
reference.rs

1//! A module for handling cacheing requests for reference sequences.
2//!
3//! Borrowed from https://github.com/varlociraptor/varlociraptor/blob/master/src/reference.rs
4use std::fs;
5use std::str;
6use std::sync::Arc;
7use std::sync::{Mutex, RwLock};
8
9use anyhow::Result;
10use bio::io::fasta;
11use lru_time_cache::LruCache;
12
13/// A lazy buffer for reference sequences.
14pub struct Buffer {
15    reader: RwLock<fasta::IndexedReader<fs::File>>,
16    sequences: Mutex<LruCache<String, Arc<Vec<u8>>>>,
17}
18
19impl Buffer {
20    /// Create a new Reference Buffer. Capacity is the number of reference sequences to hold in the cache at one time.
21    pub fn new(fasta: fasta::IndexedReader<fs::File>, capacity: usize) -> Self {
22        Buffer {
23            reader: RwLock::new(fasta),
24            sequences: Mutex::new(LruCache::with_capacity(capacity)),
25        }
26    }
27
28    /// Get a Vec of all the sequences in the Reference
29    pub fn sequences(&self) -> Vec<fasta::Sequence> {
30        self.reader.read().unwrap().index.sequences()
31    }
32
33    /// Load given chromosome and return it as a slice. This is O(1) if chromosome was loaded before.
34    pub fn seq(&self, chrom: &str) -> Result<Arc<Vec<u8>>> {
35        let mut sequences = self.sequences.lock().unwrap();
36
37        if !sequences.contains_key(chrom) {
38            let mut sequence = Arc::new(Vec::new());
39            {
40                let mut reader = self.reader.write().unwrap();
41                reader.fetch_all(chrom)?;
42                reader.read(Arc::get_mut(&mut sequence).unwrap())?;
43            }
44
45            sequences.insert(chrom.to_owned(), Arc::clone(&sequence));
46            Ok(sequence)
47        } else {
48            Ok(Arc::clone(sequences.get(chrom).unwrap()))
49        }
50    }
51}