use crate::assembler::{run_assembly, run_assembly_with_output, AssemblerParams, AssemblyResult};
use crate::output::RunOutput;
use crate::read_holder::ReadHolder;
use crate::reads_getter::ReadPair;
#[derive(Clone)]
pub struct Assembler {
params: AssemblerParams,
}
impl Default for Assembler {
fn default() -> Self {
Self {
params: AssemblerParams::default(),
}
}
}
impl Assembler {
pub fn new() -> Self {
Self::default()
}
pub fn min_kmer(mut self, k: usize) -> Self {
self.params.min_kmer = k;
self
}
pub fn max_kmer(mut self, k: usize) -> Self {
self.params.max_kmer = k;
self
}
pub fn steps(mut self, n: usize) -> Self {
self.params.steps = n;
self
}
pub fn fraction(mut self, f: f64) -> Self {
self.params.fraction = f;
self
}
pub fn max_snp_len(mut self, n: usize) -> Self {
self.params.max_snp_len = n;
self
}
pub fn min_count(mut self, n: usize) -> Self {
self.params.min_count = n;
self.params.estimate_min_count = false;
self
}
pub fn estimate_min_count(mut self, on: bool) -> Self {
self.params.estimate_min_count = on;
self
}
pub fn max_kmer_count(mut self, n: usize) -> Self {
self.params.max_kmer_count = n;
self
}
pub fn force_single_reads(mut self, on: bool) -> Self {
self.params.force_single_reads = on;
self
}
pub fn insert_size(mut self, n: usize) -> Self {
self.params.insert_size = n;
self
}
pub fn allow_snps(mut self, on: bool) -> Self {
self.params.allow_snps = on;
self
}
pub fn ncores(mut self, n: usize) -> Self {
self.params.ncores = n;
self
}
pub fn memory_gb(mut self, n: usize) -> Self {
self.params.memory_gb = n;
self
}
pub fn params(&self) -> &AssemblerParams {
&self.params
}
pub fn assemble(&self, reads: Vec<ReadPair>, seeds: &[String]) -> AssemblyResult {
run_assembly(&reads, &self.params, seeds)
}
pub fn assemble_with_output(
&self,
reads: Vec<ReadPair>,
seeds: &[String],
output: &dyn RunOutput,
) -> AssemblyResult {
run_assembly_with_output(&reads, &self.params, seeds, output)
}
}
pub struct ReadSet {
paired: ReadHolder,
unpaired: ReadHolder,
}
impl Default for ReadSet {
fn default() -> Self {
Self::new()
}
}
impl ReadSet {
pub fn new() -> Self {
Self {
paired: ReadHolder::new(true),
unpaired: ReadHolder::new(false),
}
}
pub fn add_pair(&mut self, mate1: &str, mate2: &str) {
self.paired.push_back_str(mate1);
self.paired.push_back_str(mate2);
}
pub fn add_pair_bytes(&mut self, mate1: &[u8], mate2: &[u8]) {
let s1 = std::str::from_utf8(mate1).unwrap_or("");
let s2 = std::str::from_utf8(mate2).unwrap_or("");
self.add_pair(s1, s2);
}
pub fn add_single(&mut self, read: &str) {
self.unpaired.push_back_str(read);
}
pub fn add_single_bytes(&mut self, read: &[u8]) {
let s = std::str::from_utf8(read).unwrap_or("");
self.unpaired.push_back_str(s);
}
pub fn read_count(&self) -> usize {
self.paired.read_num() + self.unpaired.read_num()
}
pub fn into_pairs(self) -> Vec<ReadPair> {
vec![[self.paired, self.unpaired]]
}
pub fn extend_pairs(self, pairs: &mut Vec<ReadPair>) {
pairs.push([self.paired, self.unpaired]);
}
}
#[cfg(test)]
mod tests {
use super::*;
fn synthesize_genome(seed: u32, n: usize) -> String {
let mut x = seed;
let mut s = String::with_capacity(n);
for _ in 0..n {
x = x.wrapping_mul(1664525).wrapping_add(1013904223);
s.push(b"ACGT"[((x >> 24) & 3) as usize] as char);
}
s
}
#[test]
fn builder_assembles_synthetic_paired_reads() {
let genome = synthesize_genome(0xC0FFEE, 1500);
let read_len = 100;
let mut reads = ReadSet::new();
for i in 0..600 {
let pos = (i * 41) % (genome.len() - read_len * 2);
let m1 = &genome[pos..pos + read_len];
let m2 = &genome[pos + read_len..pos + 2 * read_len];
reads.add_pair(m1, m2);
}
assert!(reads.read_count() > 0);
let result = Assembler::new()
.min_kmer(21)
.max_kmer(21)
.steps(1)
.min_count(2)
.estimate_min_count(false)
.assemble(reads.into_pairs(), &[]);
assert!(
!result.contigs.is_empty(),
"expected at least one contig from synthetic data"
);
}
#[test]
fn builder_chaining_threads_through_to_params() {
let a = Assembler::new()
.min_kmer(31)
.max_kmer(75)
.steps(5)
.min_count(3)
.ncores(4)
.memory_gb(8)
.allow_snps(true);
let p = a.params();
assert_eq!(p.min_kmer, 31);
assert_eq!(p.max_kmer, 75);
assert_eq!(p.steps, 5);
assert_eq!(p.min_count, 3);
assert_eq!(p.ncores, 4);
assert_eq!(p.memory_gb, 8);
assert!(p.allow_snps);
assert!(!p.estimate_min_count);
}
#[test]
fn read_set_accepts_byte_streams() {
let mut reads = ReadSet::new();
reads.add_pair_bytes(b"ACGTACGT", b"GTACGTAC");
reads.add_pair_bytes(b"AAACCCGGGTTT", b"AAACCCGGGTTT");
reads.add_single_bytes(b"GGGGCCCC");
assert_eq!(reads.read_count(), 5); }
}