1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
use super::{
    TextType,
    BwtBlockSize,
    LtFmIndexBuilder,
    BuildError,
};

#[cfg(target_pointer_width = "32")]
const POINTER_WIDTH: usize = 32;
#[cfg(target_pointer_width = "64")]
const POINTER_WIDTH: usize = 64;

const DEFAULT_BBS: BwtBlockSize = BwtBlockSize::_128;
const DEFAULT_SASR: u64 = 2;
impl Default for LtFmIndexBuilder {
    fn default() -> Self {
        Self {
            text_type: None,
            bwt_block_size: DEFAULT_BBS,
            suffix_array_sampling_ratio: DEFAULT_SASR,
            lookup_table_kmer_size: None,
        }
    }
}

impl LtFmIndexBuilder {
    /// New builder with default option
    pub fn new() -> Self {
        Default::default()
    }
    /// Make the [TextType] to be inferred (This is default option)
    ///  - What is the difference from the specification of TextType manually?
    ///    - If the ACGTY is used as Text, TextType is inferred as TextType::NucleotideWithNoise (ACGT and Y as wildcard)
    ///    - However, when you specify the TextType as TextType::NucleotideOnly, you can force the both T and Y to be treated as wildcard.
    ///       - Locating AATTAA, AAYYAA, AA@@AA, AA**AA, ... all gives the same result.
    pub fn text_type_is_inferred(mut self) -> Self {
        self.text_type = None;
        self
    }
    /// Mark the [TextType] as [TextType::NucleotideOnly]
    pub fn text_type_is_nucleotide_only(mut self) -> Self {
        self.text_type = Some(TextType::NucleotideOnly);
        self
    }
    /// Mark the [TextType] as [TextType::NucleotideWithNoise]
    pub fn text_type_is_nucleotide_with_noise(mut self) -> Self {
        self.text_type = Some(TextType::NucleotideWithNoise);
        self
    }
    /// Mark the [TextType] as [TextType::AminoAcidOnly]
    pub fn text_type_is_amino_acid_only(mut self) -> Self {
        self.text_type = Some(TextType::AminoAcidOnly);
        self
    }
    /// Mark the [TextType] as [TextType::AminoAcidWithNoise]
    pub fn text_type_is_amino_acid_with_noise(mut self) -> Self {
        self.text_type = Some(TextType::AminoAcidWithNoise);
        self
    }
    /// Use the default BWT block size
    pub fn bwt_block_size_is_default(mut self) -> Self {
        self.bwt_block_size = DEFAULT_BBS;
        self
    }
    /// Use the 64-sized BWT block
    pub fn bwt_block_size_is_64(mut self) -> Self {
        self.bwt_block_size = BwtBlockSize::_64;
        self
    }
    /// Use the 128-sized BWT block
    pub fn bwt_block_size_is_128(mut self) -> Self {
        self.bwt_block_size = BwtBlockSize::_128;
        self
    }
    /// Use the default suffix array sampling ratio
    pub fn set_suffix_array_sampling_ratio_to_default(mut self) -> Self {
        self.suffix_array_sampling_ratio = DEFAULT_SASR;
        self
    }
    /// Set the suffix array sampling ratio
    pub fn set_suffix_array_sampling_ratio(
        mut self,
        sampling_ratio: u64,
    ) -> Result<Self, BuildError> {
        if sampling_ratio > 0 {
            self.suffix_array_sampling_ratio = sampling_ratio;
            Ok(self)
        } else {
            Err(BuildError::SasrBound)
        }
    }
    /// Use the default kmer size for lookup table
    pub fn set_lookup_table_kmer_size_to_default(mut self) -> Self {
        self.lookup_table_kmer_size = None;
        self
    }
    /// Set the size of kmer in lookup table
    pub fn set_lookup_table_kmer_size(
        mut self,
        kmer_size: usize,
    ) -> Result<Self, BuildError> {
        let max_kmer = POINTER_WIDTH / 2;
        
        if kmer_size < 2 || kmer_size > max_kmer {
            Err(BuildError::LtksBound)
        } else {
            self.lookup_table_kmer_size = Some(kmer_size);
            Ok(self)
        }
    }
}