#[cfg(feature = "dna")]
pub mod constants {
pub const DEFAULT_KMER_LENGTH: u64 = 35;
pub const DEFAULT_MINIMIZER_LENGTH: u8 = 31;
pub const DEFAULT_MINIMIZER_SPACES: u8 = 7;
pub const BITS_PER_CHAR: usize = 2;
}
#[cfg(feature = "protein")]
pub mod constants {
pub const DEFAULT_KMER_LENGTH: u64 = 15;
pub const DEFAULT_MINIMIZER_LENGTH: u8 = 12;
pub const DEFAULT_MINIMIZER_SPACES: u8 = 0;
pub const BITS_PER_CHAR: usize = 4;
}
#[cfg(feature = "dna")]
#[inline]
pub fn char_to_value(c: u8) -> Option<u64> {
match c {
b'A' | b'a' => Some(0x00),
b'C' | b'c' => Some(0x01),
b'G' | b'g' => Some(0x02),
b'T' | b't' => Some(0x03),
_ => None,
}
}
#[cfg(feature = "protein")]
#[inline]
pub fn char_to_value(c: u8) -> Option<64> {
match c {
b'*' | b'U' | b'u' | b'O' | b'o' => Some(0x00),
b'A' | b'a' => Some(0x01),
b'N' | b'n' | b'Q' | b'q' | b'S' | b's' => Some(0x02),
b'C' | b'c' => Some(0x03),
b'D' | b'd' | b'E' | b'e' => Some(0x04),
b'F' | b'f' => Some(0x05),
b'G' | b'g' => Some(0x06),
b'H' | b'h' => Some(0x07),
b'I' | b'i' | b'L' | b'l' => Some(0x08),
b'K' | b'k' => Some(0x09),
b'P' | b'p' => Some(0x0a),
b'R' | b'r' => Some(0x0b),
b'M' | b'm' | b'V' | b'v' => Some(0x0c),
b'T' | b't' => Some(0x0d),
b'W' | b'w' => Some(0x0e),
b'Y' | b'y' => Some(0x0f),
_ => None,
}
}
#[inline]
fn reverse_complement(mut kmer: u64, n: usize) -> u64 {
kmer = (kmer >> 2 & 0x3333333333333333) | (kmer << 2 & 0xCCCCCCCCCCCCCCCC);
kmer = (kmer >> 4 & 0x0F0F0F0F0F0F0F0F) | (kmer << 4 & 0xF0F0F0F0F0F0F0F0);
kmer = (kmer >> 8 & 0x00FF00FF00FF00FF) | (kmer << 8 & 0xFF00FF00FF00FF00);
kmer = (kmer >> 16 & 0x0000FFFF0000FFFF) | (kmer << 16 & 0xFFFF0000FFFF0000);
kmer = (kmer >> 32) | (kmer << 32);
(!kmer >> (64 - n * 2)) & ((1u64 << (n * 2)) - 1)
}
#[cfg(feature = "dna")]
#[inline]
pub fn canonical_representation(kmer: u64, n: usize) -> u64 {
let revcom = reverse_complement(kmer, n);
if kmer < revcom {
kmer
} else {
revcom
}
}
#[cfg(feature = "protein")]
#[inline]
pub fn canonical_representation(kmer: u64, n: usize, revcom_version: u8) -> u64 {
kmer
}
pub const DEFAULT_TOGGLE_MASK: u64 = 0xe37e28c4271b5a2d;
pub const DEFAULT_SPACED_SEED_MASK: u64 = 0;
pub const CURRENT_REVCOM_VERSION: u8 = 1;
const M1: u64 = 0xff51afd7ed558ccd;
const M2: u64 = 0xc4ceb9fe1a85ec53;
pub fn fmix64(k: u64) -> u64 {
let mut k = k;
k ^= k >> 33;
k = k.wrapping_mul(M1);
k ^= k >> 33;
k = k.wrapping_mul(M2);
k ^= k >> 33;
k
}
#[derive(Copy, Debug, Clone)]
pub struct Meros {
pub k_mer: usize,
pub l_mer: usize,
pub mask: u64,
pub spaced_seed_mask: u64,
pub toggle_mask: u64,
pub min_clear_hash_value: Option<u64>,
}
impl Meros {
pub fn new(
k_mer: usize,
l_mer: usize,
spaced_seed_mask: Option<u64>,
toggle_mask: Option<u64>,
min_clear_hash_value: Option<u64>,
) -> Self {
let mut mask = 1u64;
mask <<= l_mer * constants::BITS_PER_CHAR;
mask -= 1;
Self {
k_mer,
l_mer,
mask,
spaced_seed_mask: spaced_seed_mask.unwrap_or(DEFAULT_SPACED_SEED_MASK),
toggle_mask: toggle_mask.unwrap_or(DEFAULT_TOGGLE_MASK) & mask,
min_clear_hash_value,
}
}
pub fn window_size(&self) -> usize {
self.k_mer - self.l_mer
}
}
impl Default for Meros {
fn default() -> Self {
let l_mer = constants::DEFAULT_MINIMIZER_LENGTH as usize;
let k_mer = constants::DEFAULT_KMER_LENGTH as usize;
let mut mask = 1u64;
mask <<= l_mer * constants::BITS_PER_CHAR;
mask -= 1;
Self {
k_mer,
l_mer,
mask,
spaced_seed_mask: DEFAULT_SPACED_SEED_MASK,
toggle_mask: DEFAULT_TOGGLE_MASK & mask,
min_clear_hash_value: None,
}
}
}