use std::str::FromStr;
use crate::{subsetseq::SubsetMatrix, SeqStream};
use crate::sbwt::{PrefixLookupTable, SbwtIndex};
use crate::streaming_index::LcsArray;
#[derive(Clone, Eq, PartialEq, Debug)]
struct SeqStreamWithRevComp<SS: SeqStream + Send>{
inner: SS,
rc_buf: Vec<u8>,
parity: bool, }
impl<SS: SeqStream + Send> crate::SeqStream for SeqStreamWithRevComp<SS>{
fn stream_next(&mut self) -> Option<&[u8]> {
self.parity = !self.parity;
if self.parity {
#[allow(clippy::question_mark)] let new = match self.inner.stream_next() {
None => return None, Some(r) => r };
self.rc_buf.clear();
self.rc_buf.extend(new);
Some(new)
} else {
jseqio::reverse_complement_in_place(&mut self.rc_buf);
Some(&self.rc_buf)
}
}
}
pub trait SbwtConstructionAlgorithm {
fn run<SS: SeqStream + Send>(self, input: SS, k: usize, n_threads: usize, build_lcs: bool, add_all_dummy_paths: bool) -> (SbwtIndex<SubsetMatrix>, Option<LcsArray>);
}
#[derive(Default)]
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct BitPackedKmerSortingDisk{
mem_gb: usize,
dedup_batches: bool,
temp_dir: std::path::PathBuf,
}
#[deprecated(
since = "0.4.0",
note = "With the introduction of BitPackedKmerSortingMem, the old struct BitPackedKmerSorting has been renamed to BitPackedKmerSortingDisk."
)]
pub type BitPackedKmerSorting = BitPackedKmerSortingDisk;
impl BitPackedKmerSortingDisk {
pub fn new() -> Self {
Self{mem_gb: 4, dedup_batches: false, temp_dir: std::path::PathBuf::from_str(".").unwrap()}
}
pub fn mem_gb(mut self, mem_gb: usize) -> Self {
self.mem_gb = mem_gb;
self
}
pub fn dedup_batches(mut self, enable: bool) -> Self {
self.dedup_batches = enable;
self
}
pub fn temp_dir(mut self, temp_dir: &std::path::Path) -> Self {
self.temp_dir = temp_dir.to_path_buf();
std::fs::create_dir_all(&self.temp_dir).unwrap();
self
}
}
impl SbwtConstructionAlgorithm for BitPackedKmerSortingDisk {
fn run<SS: SeqStream + Send>(self, input: SS, k: usize, n_threads: usize, build_lcs: bool, add_all_dummy_paths: bool) -> (SbwtIndex<SubsetMatrix>, Option<LcsArray>) {
let mem_gb = self.mem_gb;
let dedup_batches = self.dedup_batches;
let mut temp_file_manager = crate::tempfile::TempFileManager::new(&self.temp_dir);
match k {
0..=32 => {
crate::bitpacked_kmer_sorting::build_with_bitpacked_kmer_sorting::<1,_,SubsetMatrix>(input, k, mem_gb, n_threads, dedup_batches, build_lcs, add_all_dummy_paths, &mut temp_file_manager)
}
33..=64 => {
crate::bitpacked_kmer_sorting::build_with_bitpacked_kmer_sorting::<2,_,SubsetMatrix>(input, k, mem_gb, n_threads, dedup_batches, build_lcs, add_all_dummy_paths, &mut temp_file_manager)
}
65..=96 => {
crate::bitpacked_kmer_sorting::build_with_bitpacked_kmer_sorting::<3,_,SubsetMatrix>(input, k, mem_gb, n_threads, dedup_batches, build_lcs, add_all_dummy_paths, &mut temp_file_manager)
}
97..=128 => {
crate::bitpacked_kmer_sorting::build_with_bitpacked_kmer_sorting::<4,_,SubsetMatrix>(input, k, mem_gb, n_threads, dedup_batches, build_lcs, add_all_dummy_paths, &mut temp_file_manager)
}
129..=256 => {
crate::bitpacked_kmer_sorting::build_with_bitpacked_kmer_sorting::<8,_,SubsetMatrix>(input, k, mem_gb, n_threads, dedup_batches, build_lcs, add_all_dummy_paths, &mut temp_file_manager)
}
_ => {
panic!("k > 256 not supported with bitpacked sorting algorithm.");
}
}
}
}
#[derive(Default)]
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct BitPackedKmerSortingMem{
dedup_batches: bool,
mem_gb: usize,
}
impl BitPackedKmerSortingMem {
pub fn new() -> Self {
Self{dedup_batches: false, mem_gb: 8}
}
pub fn dedup_batches(mut self, enable: bool) -> Self {
self.dedup_batches = enable;
self
}
pub fn mem_gb(mut self, mem_gb: usize) -> Self {
self.mem_gb = mem_gb;
self
}
}
impl SbwtConstructionAlgorithm for BitPackedKmerSortingMem {
fn run<SS: SeqStream + Send>(self, input: SS, k: usize, n_threads: usize, build_lcs: bool, add_all_dummy_paths: bool) -> (SbwtIndex<SubsetMatrix>, Option<LcsArray>) {
let dedup_batches = self.dedup_batches;
let mem_gb = self.mem_gb;
match k {
0..=32 => {
crate::bitpacked_kmer_sorting_mem::build_with_bitpacked_kmer_sorting::<1,_,SubsetMatrix>(input, k, n_threads, mem_gb, dedup_batches, build_lcs, add_all_dummy_paths)
}
33..=64 => {
crate::bitpacked_kmer_sorting_mem::build_with_bitpacked_kmer_sorting::<2,_,SubsetMatrix>(input, k, n_threads, mem_gb, dedup_batches, build_lcs, add_all_dummy_paths)
}
65..=96 => {
crate::bitpacked_kmer_sorting_mem::build_with_bitpacked_kmer_sorting::<3,_,SubsetMatrix>(input, k, n_threads, mem_gb, dedup_batches, build_lcs, add_all_dummy_paths)
}
97..=128 => {
crate::bitpacked_kmer_sorting_mem::build_with_bitpacked_kmer_sorting::<4,_,SubsetMatrix>(input, k, n_threads, mem_gb, dedup_batches, build_lcs, add_all_dummy_paths)
}
129..=256 => {
crate::bitpacked_kmer_sorting_mem::build_with_bitpacked_kmer_sorting::<8,_,SubsetMatrix>(input, k, n_threads, mem_gb, dedup_batches, build_lcs, add_all_dummy_paths)
}
_ => {
panic!("k > 256 not supported with bitpacked sorting algorithm.");
}
}
}
}
#[derive(Default)]
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct SbwtIndexBuilder<A: SbwtConstructionAlgorithm + Default> {
k: usize,
n_threads: usize,
algorithm: A,
build_lcs: bool,
add_rev_comp: bool,
build_select_support: bool,
precalc_length: usize,
add_all_dummy_paths: bool,
}
impl<A: SbwtConstructionAlgorithm + Default> SbwtIndexBuilder<A> {
pub fn new() -> Self {
Self{k: 31, n_threads: 4, algorithm: A::default(), build_lcs: false, add_rev_comp: false, build_select_support: false, precalc_length: 8, add_all_dummy_paths: false}
}
pub fn k(mut self, k: usize) -> Self {
self.k = k;
self
}
pub fn algorithm(mut self, alg: A) -> Self {
self.algorithm = alg;
self
}
pub fn build_lcs(mut self, enable: bool) -> Self {
self.build_lcs = enable;
self
}
pub fn build_select_support(mut self, enable: bool) -> Self {
self.build_select_support = enable;
self
}
pub fn add_rev_comp(mut self, enable: bool) -> Self {
self.add_rev_comp = enable;
self
}
pub fn precalc_length(mut self, precalc_length: usize) -> Self {
self.precalc_length = precalc_length;
self
}
pub fn n_threads(mut self, n_threads: usize) -> Self {
self.n_threads = n_threads;
self
}
pub fn add_all_dummy_paths(mut self, enable: bool) -> Self {
self.add_all_dummy_paths = enable;
self
}
pub fn run<SS: SeqStream + Send>(self, input: SS) -> (SbwtIndex<SubsetMatrix>, Option<LcsArray>) {
let add_all_dummy_paths = self.add_all_dummy_paths;
let (mut sbwt, lcs) = if self.add_rev_comp {
let input_with_rc = SeqStreamWithRevComp{inner: input, rc_buf: Vec::<u8>::new(), parity: false};
self.algorithm.run(input_with_rc, self.k, self.n_threads, self.build_lcs, add_all_dummy_paths)
} else {
self.algorithm.run(input, self.k, self.n_threads, self.build_lcs, add_all_dummy_paths)
};
if self.build_select_support {
sbwt.build_select();
}
if sbwt.get_lookup_table().prefix_length != self.precalc_length {
let lut = PrefixLookupTable::new(&sbwt, self.precalc_length);
sbwt.set_lookup_table(lut);
}
(sbwt, lcs)
}
pub fn run_from_slices(self, input: &[&[u8]]) -> (SbwtIndex<SubsetMatrix>, Option<LcsArray>) {
let stream = crate::util::SliceSeqStream::new(input);
self.run(stream)
}
pub fn run_from_vecs(self, input: &[Vec<u8>]) -> (SbwtIndex<SubsetMatrix>, Option<LcsArray>) {
let stream = crate::util::VecSeqStream::new(input);
self.run(stream)
}
pub fn run_from_fasta<R: std::io::Read + Send + 'static>(self, input: R) -> (SbwtIndex<SubsetMatrix>, Option<LcsArray>) {
let input = std::io::BufReader::new(input);
let input = crate::JSeqIOSeqStreamWrapper{inner: jseqio::reader::DynamicFastXReader::new(input).unwrap()};
self.run(input)
}
pub fn run_from_fastq<R: std::io::Read + Send + 'static>(self, input: R) -> (SbwtIndex<SubsetMatrix>, Option<LcsArray>) {
let input = std::io::BufReader::new(input);
let input = crate::JSeqIOSeqStreamWrapper{inner: jseqio::reader::DynamicFastXReader::new(input).unwrap()};
self.run(input)
}
}