Skip to main content

sshash_lib/builder/
config.rs

1//! Build configuration for SSHash dictionary construction
2//!
3//! Mirrors the C++ `build_configuration` struct with parameters for
4//! dictionary building, minimizer computation, and resource limits.
5
6use crate::constants::DEFAULT_SEED;
7use std::path::PathBuf;
8
9/// Configuration parameters for building an SSHash dictionary
10#[derive(Debug, Clone)]
11pub struct BuildConfiguration {
12    /// K-mer length (must be odd, between 3 and 63)
13    pub k: usize,
14    
15    /// Minimizer length (must be odd, m < k)
16    pub m: usize,
17    
18    /// Seed for hash functions
19    pub seed: u64,
20    
21    /// Number of threads for parallel operations (0 = all available cores)
22    pub num_threads: usize,
23    
24    /// RAM limit in GiB for external sorting
25    pub ram_limit_gib: usize,
26    
27    /// PTHash lambda parameter (trade-off for MPHF construction)
28    /// Typically 3.5-4.0 for minimal size, higher for faster queries
29    pub lambda: f64,
30    
31    /// Build in canonical mode (k-mer and reverse-complement map to same entry)
32    pub canonical: bool,
33    
34    /// Build weighted dictionary (with k-mer abundance/weights)
35    pub weighted: bool,
36    
37    /// Verbose output during construction
38    pub verbose: bool,
39    
40    /// Directory for temporary files during construction
41    pub tmp_dirname: PathBuf,
42}
43
44impl Default for BuildConfiguration {
45    fn default() -> Self {
46        Self {
47            k: 31,
48            m: 19,  // Must be odd, less than k
49            seed: DEFAULT_SEED,
50            num_threads: 0, // 0 = use all available cores
51            ram_limit_gib: 8,
52            lambda: 6.0,  // C++ default
53            canonical: false,
54            weighted: false,
55            verbose: true,
56            tmp_dirname: PathBuf::from("sshash_tmp"),
57        }
58    }
59}
60
61impl BuildConfiguration {
62    /// Create a new build configuration with the specified k-mer and minimizer lengths
63    pub fn new(k: usize, m: usize) -> Result<Self, String> {
64        let config = Self {
65            k,
66            m,
67            ..Self::default()
68        };
69        config.validate()?;
70        Ok(config)
71    }
72    
73    /// Validate the configuration parameters
74    pub fn validate(&self) -> Result<(), String> {
75        // Check k is odd and in valid range
76        if self.k % 2 == 0 {
77            return Err(format!("k must be odd, got k={}", self.k));
78        }
79        if self.k < 3 || self.k > 63 {
80            return Err(format!("k must be in range [3, 63], got k={}", self.k));
81        }
82        
83        // Check m is less than k
84        if self.m >= self.k {
85            return Err(format!("m must be less than k, got m={}, k={}", self.m, self.k));
86        }
87        
88        // Check lambda is reasonable
89        if self.lambda < 1.0 || self.lambda > 100.0 {
90            return Err(format!("lambda should be in range [1.0, 100.0], got {}", self.lambda));
91        }
92        
93        Ok(())
94    }
95    
96    /// Log configuration parameters via tracing
97    pub fn print(&self) {
98        tracing::info!("Build Configuration:");
99        tracing::info!("  k = {}", self.k);
100        tracing::info!("  m = {}", self.m);
101        tracing::debug!("  seed = {}", self.seed);
102        if self.num_threads == 0 {
103            tracing::info!("  num_threads = all available cores");
104        } else {
105            tracing::info!("  num_threads = {}", self.num_threads);
106        }
107        tracing::debug!("  ram_limit_gib = {}", self.ram_limit_gib);
108        tracing::debug!("  lambda = {}", self.lambda);
109        tracing::info!("  canonical = {}", self.canonical);
110        tracing::debug!("  weighted = {}", self.weighted);
111        tracing::debug!("  verbose = {}", self.verbose);
112        tracing::debug!("  tmp_dirname = {:?}", self.tmp_dirname);
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119    
120    #[test]
121    fn test_default_config() {
122        let config = BuildConfiguration::default();
123        assert_eq!(config.k, 31);
124        assert_eq!(config.m, 19);
125        assert!(config.validate().is_ok());
126    }
127    
128    #[test]
129    fn test_new_config() {
130        let config = BuildConfiguration::new(21, 11).unwrap();
131        assert_eq!(config.k, 21);
132        assert_eq!(config.m, 11);
133    }
134    
135    #[test]
136    fn test_validate_even_k() {
137        let config = BuildConfiguration { k: 30, ..BuildConfiguration::default() };
138        assert!(config.validate().is_err());
139    }
140    
141    #[test]
142    fn test_validate_even_m() {
143        let config = BuildConfiguration { m: 20, ..BuildConfiguration::default() };
144        assert!(config.validate().is_ok());
145    }
146    
147    #[test]
148    fn test_validate_m_ge_k() {
149        let config = BuildConfiguration { k: 21, m: 21, ..BuildConfiguration::default() };
150        assert!(config.validate().is_err());
151    }
152    
153    #[test]
154    fn test_validate_k_out_of_range() {
155        let config = BuildConfiguration { k: 65, ..BuildConfiguration::default() };
156        assert!(config.validate().is_err());
157        
158        let config = BuildConfiguration { k: 1, ..BuildConfiguration::default() };
159        assert!(config.validate().is_err());
160    }
161}