use crate::BuildOpts;
use std::ffi::OsString;
use std::io::Write;
use std::ops::Range;
use std::path::PathBuf;
use sbwt::BitPackedKmerSorting;
use sbwt::BitPackedKmerSortingMem;
use sbwt::SbwtIndexBuilder;
use sbwt::SbwtIndexVariant;
pub fn build_sbwt_from_vecs(
slices: &[Vec<u8>],
build_options: &Option<BuildOpts>,
) -> (SbwtIndexVariant, sbwt::LcsArray) {
assert!(!slices.is_empty());
let build_opts = if build_options.is_some() { build_options.clone().unwrap() } else { BuildOpts::default() };
let (sbwt, lcs) = if build_opts.temp_dir.is_some() {
let temp_dir = build_opts.temp_dir.unwrap();
let algorithm = BitPackedKmerSorting::new()
.mem_gb(build_opts.mem_gb)
.dedup_batches(build_opts.dedup_batches)
.temp_dir(PathBuf::from(OsString::from(temp_dir)).as_path());
SbwtIndexBuilder::new()
.k(build_opts.k)
.n_threads(build_opts.num_threads)
.add_rev_comp(build_opts.add_revcomp)
.algorithm(algorithm)
.build_lcs(true)
.build_select_support(build_opts.build_select)
.precalc_length(build_opts.prefix_precalc)
.run_from_vecs(slices)
} else {
let algorithm = BitPackedKmerSortingMem::new()
.dedup_batches(build_opts.dedup_batches);
SbwtIndexBuilder::new()
.k(build_opts.k)
.n_threads(build_opts.num_threads)
.add_rev_comp(build_opts.add_revcomp)
.algorithm(algorithm)
.build_lcs(true)
.build_select_support(build_opts.build_select)
.precalc_length(build_opts.prefix_precalc)
.run_from_vecs(slices)
};
(SbwtIndexVariant::SubsetMatrix(sbwt), lcs.unwrap())
}
pub fn serialize_sbwt(
outfile_prefix: &str,
sbwt: &SbwtIndexVariant,
lcs: &sbwt::LcsArray,
) {
let sbwt_outfile = format!("{}.sbwt", outfile_prefix);
let lcs_outfile = format!("{}.lcs", outfile_prefix);
let sbwt_conn = std::fs::File::create(&sbwt_outfile).unwrap_or_else(|_| panic!("Expected write access to {}", sbwt_outfile));
let mut sbwt_out = std::io::BufWriter::new(sbwt_conn);
sbwt_out.write_all(&(b"SubsetMatrix".len() as u64).to_le_bytes()).expect("Serialized SBWT header part 1.");
sbwt_out.write_all(b"SubsetMatrix").expect("Serialized SBWT header part 2.");
match sbwt {
SbwtIndexVariant::SubsetMatrix(index) => {
index.serialize(&mut sbwt_out).expect("Serialized SBWT index.");
},
};
let lcs_conn = std::fs::File::create(&lcs_outfile).unwrap_or_else(|_| panic!("Expected write access to {}", lcs_outfile));
let mut lcs_out = std::io::BufWriter::new(lcs_conn);
lcs.serialize(&mut lcs_out).expect("Serialized LCS array.");
}
pub fn load_sbwt(
index_prefix: &str,
) -> (SbwtIndexVariant, sbwt::LcsArray) {
let indexfile = format!("{}.sbwt", index_prefix);
let lcsfile = format!("{}.lcs", index_prefix);
let sbwt_conn = std::fs::File::open(&indexfile).unwrap_or_else(|_| panic!("Expected SBWT at {}", indexfile));
let mut index_reader = std::io::BufReader::new(sbwt_conn);
let sbwt = sbwt::load_sbwt_index_variant(&mut index_reader).unwrap();
let lcs_conn = std::fs::File::open(&lcsfile).unwrap_or_else(|_| panic!("Expected LCS array at {}", lcsfile));
let mut lcs_reader = std::io::BufReader::new(lcs_conn);
let lcs = sbwt::LcsArray::load(&mut lcs_reader).unwrap();
(sbwt, lcs)
}
pub fn query_sbwt(
query: &[u8],
sbwt: &SbwtIndexVariant,
lcs: &sbwt::LcsArray,
) -> Vec<(usize, Range<usize>)> {
assert!(!query.is_empty());
let ms = match sbwt {
SbwtIndexVariant::SubsetMatrix(index) => {
let streaming_index = sbwt::StreamingIndex::new(index, lcs);
streaming_index.matching_statistics(query)
},
};
ms
}
#[cfg(test)]
mod tests {
#[test]
fn build_and_query_sbwt() {
let reference: Vec<Vec<u8>> = vec![vec![b'A',b'A',b'A',b'G',b'A',b'A',b'C',b'C',b'A',b'-',b'T',b'C',b'A',b'G',b'G',b'G',b'C',b'G']];
let query: Vec<u8> = vec![b'C',b'A',b'A',b'G',b'C',b'C',b'A',b'C',b'T',b'C',b'A',b'T',b'T',b'G',b'G',b'G',b'T',b'C'];
let (sbwt, lcs) = super::build_sbwt_from_vecs(&reference, &Some(super::BuildOpts{ k: 3, ..Default::default() }));
let expected = vec![1,2,2,3,2,2,3,2,1,2,3,1,1,1,2,3,1,2];
let got: Vec<usize> = super::query_sbwt(&query, &sbwt, &lcs).iter().map(|x| x.0).collect();
assert_eq!(got, expected);
}
#[test]
fn build_serialize_load_sbwt() {
let reference: Vec<Vec<u8>> = vec![vec![b'A',b'A',b'A',b'G',b'A',b'A',b'C',b'C',b'A',b'-',b'T',b'C',b'A',b'G',b'G',b'G',b'C',b'G']];
let (sbwt, lcs) = super::build_sbwt_from_vecs(&reference, &Some(super::BuildOpts{ k: 3, ..Default::default() }));
let index_prefix = std::env::temp_dir().to_str().unwrap().to_owned() + "/serialized_index_test";
super::serialize_sbwt(&index_prefix, &sbwt, &lcs);
let (sbwt_loaded, lcs_loaded) = super::load_sbwt(&index_prefix);
assert_eq!(lcs, lcs_loaded);
match sbwt {
sbwt::SbwtIndexVariant::SubsetMatrix(ref index) => {
match sbwt_loaded {
sbwt::SbwtIndexVariant::SubsetMatrix(ref index_loaded) => {
assert_eq!(index, index_loaded);
},
};
},
};
}
}