#![doc = include_str!("../README.md")]
pub mod filter;
pub mod index;
pub mod minimizers;
pub use filter::{FilterSummary, run as run_filter};
#[cfg(feature = "fetch")]
pub use index::fetch as index_fetch;
pub use index::{
INDEX_FORMAT_VERSION, IndexHeader, build as index_build, diff as index_diff,
dump as index_dump, dump_minimizers, info as index_info, intersect as index_intersect,
load_minimizers, union as index_union,
};
pub use minimizers::{DEFAULT_KMER_LENGTH, DEFAULT_WINDOW_SIZE, decode_u64, decode_u128};
use anyhow::Result;
use std::collections::HashSet;
use std::hash::BuildHasher;
use std::path::{Path, PathBuf};
#[derive(Clone, Default)]
pub struct FixedRapidHasher;
impl BuildHasher for FixedRapidHasher {
type Hasher = rapidhash::fast::RapidHasher<'static>;
fn build_hasher(&self) -> Self::Hasher {
rapidhash::fast::SeedableState::fixed().build_hasher()
}
}
pub type RapidHashSet<T> = HashSet<T, FixedRapidHasher>;
pub enum MinimizerSet {
U64(RapidHashSet<u64>),
U128(RapidHashSet<u128>),
}
impl MinimizerSet {
pub fn len(&self) -> usize {
match self {
MinimizerSet::U64(set) => set.len(),
MinimizerSet::U128(set) => set.len(),
}
}
pub fn is_u64(&self) -> bool {
matches!(self, MinimizerSet::U64(_))
}
pub fn extend(&mut self, other: Self) {
match (self, other) {
(MinimizerSet::U64(self_set), MinimizerSet::U64(other_set)) => {
self_set.extend(other_set);
}
(MinimizerSet::U128(self_set), MinimizerSet::U128(other_set)) => {
self_set.extend(other_set);
}
_ => panic!("Cannot extend U64 set with U128 set or vice versa"),
}
}
pub fn remove_all(&mut self, other: &Self) {
match (self, other) {
(MinimizerSet::U64(self_set), MinimizerSet::U64(other_set)) => {
for val in other_set {
self_set.remove(val);
}
}
(MinimizerSet::U128(self_set), MinimizerSet::U128(other_set)) => {
for val in other_set {
self_set.remove(val);
}
}
_ => panic!("Cannot remove U128 minimizers from U64 set or vice versa"),
}
}
pub fn intersect(&mut self, other: &Self) {
match (self, other) {
(MinimizerSet::U64(self_set), MinimizerSet::U64(other_set)) => {
self_set.retain(|val| other_set.contains(val));
}
(MinimizerSet::U128(self_set), MinimizerSet::U128(other_set)) => {
self_set.retain(|val| other_set.contains(val));
}
_ => panic!("Cannot intersect U64 set with U128 set or vice versa"),
}
}
}
#[derive(Clone)]
pub enum MinimizerVec {
U64(Vec<u64>),
U128(Vec<u128>),
}
impl MinimizerVec {
pub fn clear(&mut self) {
match self {
MinimizerVec::U64(v) => v.clear(),
MinimizerVec::U128(v) => v.clear(),
}
}
pub fn len(&self) -> usize {
match self {
MinimizerVec::U64(v) => v.len(),
MinimizerVec::U128(v) => v.len(),
}
}
pub fn is_empty(&self) -> bool {
match self {
MinimizerVec::U64(v) => v.is_empty(),
MinimizerVec::U128(v) => v.is_empty(),
}
}
}
pub struct FilterConfig<'a> {
pub minimizers_path: &'a Path,
pub input_path: &'a str,
pub input2_path: Option<&'a str>,
pub output_path: Option<&'a Path>,
pub output2_path: Option<&'a str>,
pub abs_threshold: usize,
pub rel_threshold: f64,
pub prefix_length: usize,
pub summary_path: Option<&'a PathBuf>,
pub deplete: bool,
pub rename: bool,
pub rename_random: bool,
pub output_fasta: bool,
pub threads: u16,
pub compression_level: u8,
pub compression_threads: u16,
pub debug: bool,
pub quiet: bool,
}
impl<'a> FilterConfig<'a> {
pub fn new(minimizers_path: &'a Path) -> Self {
Self {
minimizers_path,
input_path: "-",
input2_path: None,
output_path: None,
output2_path: None,
abs_threshold: 2,
rel_threshold: 0.01,
prefix_length: 0,
summary_path: None,
deplete: false,
rename: false,
rename_random: false,
output_fasta: false,
threads: 0, compression_level: 2, compression_threads: 0, debug: false,
quiet: false,
}
}
pub fn with_input(mut self, input_path: &'a str) -> Self {
self.input_path = input_path;
self
}
pub fn with_input2(mut self, input2_path: &'a str) -> Self {
self.input2_path = Some(input2_path);
self
}
pub fn with_output(mut self, output_path: &'a Path) -> Self {
self.output_path = Some(output_path);
self
}
pub fn with_output2(mut self, output2_path: &'a str) -> Self {
self.output2_path = Some(output2_path);
self
}
pub fn with_abs_threshold(mut self, abs_threshold: usize) -> Self {
self.abs_threshold = abs_threshold;
self
}
pub fn with_rel_threshold(mut self, rel_threshold: f64) -> Self {
self.rel_threshold = rel_threshold;
self
}
pub fn with_prefix_length(mut self, prefix_length: usize) -> Self {
self.prefix_length = prefix_length;
self
}
pub fn with_summary(mut self, summary_path: &'a PathBuf) -> Self {
self.summary_path = Some(summary_path);
self
}
pub fn with_deplete(mut self, deplete: bool) -> Self {
self.deplete = deplete;
self
}
pub fn with_rename(mut self, rename: bool) -> Self {
self.rename = rename;
self
}
pub fn with_rename_random(mut self, rename_random: bool) -> Self {
self.rename_random = rename_random;
self
}
pub fn with_threads(mut self, threads: u16) -> Self {
self.threads = threads;
self
}
pub fn with_compression_level(mut self, compression_level: u8) -> Self {
self.compression_level = compression_level;
self
}
pub fn with_compression_threads(mut self, compression_threads: u16) -> Self {
self.compression_threads = compression_threads;
self
}
pub fn with_debug(mut self, debug: bool) -> Self {
self.debug = debug;
self
}
pub fn with_quiet(mut self, quiet: bool) -> Self {
self.quiet = quiet;
self
}
pub fn execute(&self) -> Result<()> {
filter::run(self)
}
}
pub struct IndexConfig {
pub input_path: PathBuf,
pub kmer_length: u8,
pub window_size: u8,
pub output_path: Option<PathBuf>,
pub threads: u16,
pub quiet: bool,
pub entropy_threshold: f32,
}
impl IndexConfig {
pub fn new(input_path: PathBuf) -> Self {
Self {
input_path: input_path,
kmer_length: DEFAULT_KMER_LENGTH,
window_size: DEFAULT_WINDOW_SIZE,
output_path: None,
threads: 8,
quiet: false,
entropy_threshold: 0.0,
}
}
pub fn validate(&self) -> Result<()> {
let k = self.kmer_length as usize;
let w = self.window_size as usize;
if k > 61 || k + w > 96 || (k + w) % 2 != 0 {
return Err(anyhow::anyhow!(
"Invalid k-w combination: k={}, w={}, k+w={} (constraints: k<=61, k+w<=96, k+w even)",
k,
w,
k + w
));
}
Ok(())
}
pub fn with_kmer_length(mut self, kmer_length: u8) -> Self {
self.kmer_length = kmer_length;
self
}
pub fn with_window_size(mut self, window_size: u8) -> Self {
self.window_size = window_size;
self
}
pub fn with_output(mut self, output_path: PathBuf) -> Self {
self.output_path = Some(output_path);
self
}
pub fn with_threads(mut self, threads: u16) -> Self {
self.threads = threads;
self
}
pub fn with_quiet(mut self, quiet: bool) -> Self {
self.quiet = quiet;
self
}
pub fn with_entropy_threshold(mut self, threshold: f32) -> Self {
self.entropy_threshold = threshold;
self
}
pub fn execute(&self) -> Result<()> {
index::build(self)
}
}