use anyhow;
use anyhow::Result;
use fxread::{FastxRead, Record};
use hashbrown::HashMap;
type FxReader = Box<dyn FastxRead<Item = Record>>;
pub struct Library {
table: HashMap<Vec<u8>, Vec<u8>>,
size: usize,
}
impl Library {
pub fn from_reader(reader: FxReader) -> Result<Self> {
let table = Self::table_from_reader(reader);
let size = Self::calculate_base_size(&table)?;
Ok(Self { table, size })
}
pub fn from_hashmap(table: HashMap<Vec<u8>, Vec<u8>>) -> Result<Self> {
let size = Self::calculate_base_size(&table)?;
Ok(Self { table, size })
}
#[must_use]
pub fn contains(&self, token: &[u8]) -> Option<&Vec<u8>> {
if self.table.contains_key(token) {
self.alias(token)
} else {
None
}
}
#[must_use]
pub fn alias(&self, token: &[u8]) -> Option<&Vec<u8>> {
self.table.get(token)
}
pub fn keys(&self) -> impl Iterator<Item = &Vec<u8>> {
self.table.keys()
}
pub fn values(&self) -> impl Iterator<Item = &Vec<u8>> {
self.table.values()
}
#[must_use]
pub fn size(&self) -> usize {
self.size
}
fn validate_unique_size<'a>(keys: impl Iterator<Item = &'a Vec<u8>>) -> bool {
keys.collect::<Vec<&Vec<u8>>>()
.windows(2)
.map(|x| (x[0], x[1]))
.all(|(x, y)| x.len() == y.len())
}
fn get_key_size(table: &HashMap<Vec<u8>, Vec<u8>>) -> usize {
table.keys().next().unwrap().len()
}
fn calculate_base_size(table: &HashMap<Vec<u8>, Vec<u8>>) -> Result<usize> {
if Self::validate_unique_size(table.keys()) {
Ok(Self::get_key_size(table))
} else {
Err(anyhow::anyhow!("Library sequence sizes are inconsistent"))
}
}
fn table_from_reader(reader: FxReader) -> HashMap<Vec<u8>, Vec<u8>> {
reader.into_iter().fold(HashMap::new(), |mut map, x| {
match map.insert(x.seq().to_owned(), x.id().to_owned()) {
Some(_) => panic!(
"Unexpected duplicate sequence in library found: {}",
std::str::from_utf8(x.seq()).unwrap()
),
None => map,
}
})
}
}
#[cfg(test)]
mod test {
use super::Library;
use fxread::{FastaReader, FastxRead, Record};
fn reader() -> Box<dyn FastxRead<Item = Record>> {
let sequence: &'static [u8] = b">seq.0\nACTG\n";
Box::new(FastaReader::new(sequence))
}
fn duplicate_reader() -> Box<dyn FastxRead<Item = Record>> {
let sequence: &'static [u8] = b">seq.0\nACTG\n>seq.1\nACTG\n";
Box::new(FastaReader::new(sequence))
}
#[test]
fn build() {
let library = Library::from_reader(reader()).unwrap();
assert_eq!(library.size(), 4);
assert_eq!(library.keys().count(), 1);
}
#[test]
fn validate_contains() {
let library = Library::from_reader(reader()).unwrap();
assert_eq!(library.contains(b"ACTG").unwrap(), b"seq.0");
assert_eq!(library.contains(b"ACTT"), None);
}
#[test]
#[should_panic]
fn duplicates() {
Library::from_reader(duplicate_reader()).unwrap();
}
}