use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
pub const DEFAULT_CONFIG_FILE: &str = ".rustkmerrc";
pub const ENV_PREFIX: &str = "RUSTKMER";
#[derive(Debug)]
pub struct ConfigManager {
global_config: Arc<parking_lot::RwLock<GlobalConfig>>,
config_file: Option<PathBuf>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobalConfig {
pub memory: MemoryConfig,
pub kmer_counting: KmerCountingConfig,
pub database: DatabaseConfig,
pub output: OutputConfig,
pub logging: LoggingConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryConfig {
pub memory_limit: Option<u64>,
pub mmap_threshold: Option<u64>,
pub page_size: Option<usize>,
pub adaptive: Option<bool>,
pub force_mmap: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KmerCountingConfig {
pub default_k: Option<usize>,
pub canonical: Option<bool>,
pub threads: Option<usize>,
pub hash_size: Option<usize>,
pub sort_output: Option<bool>,
pub min_count: Option<u32>,
pub max_count: Option<u32>,
pub progress: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatabaseConfig {
pub extension: Option<String>,
pub format_version: Option<String>,
pub compression: Option<bool>,
pub indexing: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OutputConfig {
pub format: Option<String>,
pub timestamps: Option<bool>,
pub statistics: Option<bool>,
pub verbose: Option<bool>,
pub quiet: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LoggingConfig {
pub level: Option<String>,
pub file_path: Option<String>,
pub structured: Option<bool>,
}
#[derive(Debug, Clone)]
pub struct OperationConfig {
pub memory: Option<MemoryConfig>,
pub kmer_counting: Option<KmerCountingConfig>,
pub output: Option<OutputConfig>,
}
impl Default for GlobalConfig {
fn default() -> Self {
Self {
memory: MemoryConfig::default(),
kmer_counting: KmerCountingConfig::default(),
database: DatabaseConfig::default(),
output: OutputConfig::default(),
logging: LoggingConfig::default(),
}
}
}
impl Default for MemoryConfig {
fn default() -> Self {
Self {
memory_limit: Some(1024 * 1024 * 1024), mmap_threshold: Some(100 * 1024 * 1024), page_size: Some(10000),
adaptive: Some(true),
force_mmap: Some(false),
}
}
}
impl Default for KmerCountingConfig {
fn default() -> Self {
Self {
default_k: Some(31),
canonical: Some(true),
threads: Some(0), hash_size: Some(1_000_000),
sort_output: Some(true),
min_count: None,
max_count: None,
progress: Some(true),
}
}
}
impl Default for DatabaseConfig {
fn default() -> Self {
Self {
extension: Some("rkdb".to_string()),
format_version: Some("2".to_string()),
compression: Some(true),
indexing: Some("auto".to_string()),
}
}
}
impl Default for OutputConfig {
fn default() -> Self {
Self {
format: Some("text".to_string()),
timestamps: Some(true),
statistics: Some(true),
verbose: Some(false),
quiet: Some(false),
}
}
}
impl Default for LoggingConfig {
fn default() -> Self {
Self {
level: Some("info".to_string()),
file_path: None,
structured: Some(false),
}
}
}
impl ConfigManager {
pub fn new() -> Self {
Self {
global_config: Arc::new(parking_lot::RwLock::new(GlobalConfig::default())),
config_file: Self::find_config_file(),
}
}
pub fn with_config_file<P: AsRef<Path>>(path: P) -> Self {
let config_file = Some(path.as_ref().to_path_buf());
Self {
global_config: Arc::new(parking_lot::RwLock::new(GlobalConfig::default())),
config_file,
}
}
fn find_config_file() -> Option<PathBuf> {
let current_dir = PathBuf::from(DEFAULT_CONFIG_FILE);
if current_dir.exists() {
return Some(current_dir);
}
if let Ok(home_dir) = env::var("HOME") {
let home_config = PathBuf::from(home_dir).join(DEFAULT_CONFIG_FILE);
if home_config.exists() {
return Some(home_config);
}
}
if let Ok(config_dir) = env::var("XDG_CONFIG_HOME") {
let system_config = PathBuf::from(config_dir)
.join("rustkmer")
.join(DEFAULT_CONFIG_FILE);
if system_config.exists() {
return Some(system_config);
}
}
None
}
pub fn load(&self) -> Result<()> {
let mut config = GlobalConfig::default();
if let Some(ref config_file) = self.config_file {
config = Self::load_from_file(config_file)
.with_context(|| format!("Failed to load config from {}", config_file.display()))?;
}
config = Self::apply_env_overrides(config);
*self.global_config.write() = config;
Ok(())
}
fn load_from_file<P: AsRef<Path>>(path: P) -> Result<GlobalConfig> {
let mut content = String::new();
let mut file = File::open(&path)
.with_context(|| format!("Failed to open config file: {}", path.as_ref().display()))?;
file.read_to_string(&mut content)
.with_context(|| "Failed to read config file content")?;
toml::from_str(&content).with_context(|| "Failed to parse TOML configuration")
}
pub fn save(&self) -> Result<()> {
if let Some(ref config_file) = self.config_file {
let config = self.global_config.read();
let content = toml::to_string_pretty(&*config)
.with_context(|| "Failed to serialize configuration to TOML")?;
let mut file = File::create(config_file).with_context(|| {
format!("Failed to create config file: {}", config_file.display())
})?;
file.write_all(content.as_bytes())
.with_context(|| "Failed to write config file")?;
Ok(())
} else {
Err(anyhow::anyhow!("No configuration file specified"))
}
}
fn apply_env_overrides(mut config: GlobalConfig) -> GlobalConfig {
if let Ok(val) = env::var(format!("{}_MEMORY_LIMIT", ENV_PREFIX)) {
if let Ok(limit) = val.parse::<u64>() {
config.memory.memory_limit = Some(limit);
}
}
if let Ok(val) = env::var(format!("{}_MMAP_THRESHOLD", ENV_PREFIX)) {
if let Ok(threshold) = val.parse::<u64>() {
config.memory.mmap_threshold = Some(threshold);
}
}
if let Ok(val) = env::var(format!("{}_PAGE_SIZE", ENV_PREFIX)) {
if let Ok(page_size) = val.parse::<usize>() {
config.memory.page_size = Some(page_size);
}
}
if let Ok(val) = env::var(format!("{}_ADAPTIVE", ENV_PREFIX)) {
config.memory.adaptive = Some(val.parse().unwrap_or(true));
}
if let Ok(val) = env::var(format!("{}_FORCE_MMAP", ENV_PREFIX)) {
config.memory.force_mmap = Some(val.parse().unwrap_or(false));
}
if let Ok(val) = env::var(format!("{}_DEFAULT_K", ENV_PREFIX)) {
if let Ok(k) = val.parse::<usize>() {
config.kmer_counting.default_k = Some(k);
}
}
if let Ok(val) = env::var(format!("{}_CANONICAL", ENV_PREFIX)) {
config.kmer_counting.canonical = Some(val.parse().unwrap_or(true));
}
if let Ok(val) = env::var(format!("{}_THREADS", ENV_PREFIX)) {
if let Ok(threads) = val.parse::<usize>() {
config.kmer_counting.threads = Some(threads);
}
}
if let Ok(val) = env::var(format!("{}_HASH_SIZE", ENV_PREFIX)) {
if let Ok(size) = val.parse::<usize>() {
config.kmer_counting.hash_size = Some(size);
}
}
if let Ok(val) = env::var(format!("{}_SORT_OUTPUT", ENV_PREFIX)) {
config.kmer_counting.sort_output = Some(val.parse().unwrap_or(true));
}
if let Ok(val) = env::var(format!("{}_MIN_COUNT", ENV_PREFIX)) {
if let Ok(count) = val.parse::<u32>() {
config.kmer_counting.min_count = Some(count);
}
}
if let Ok(val) = env::var(format!("{}_MAX_COUNT", ENV_PREFIX)) {
if let Ok(count) = val.parse::<u32>() {
config.kmer_counting.max_count = Some(count);
}
}
if let Ok(val) = env::var(format!("{}_PROGRESS", ENV_PREFIX)) {
config.kmer_counting.progress = Some(val.parse().unwrap_or(true));
}
if let Ok(val) = env::var(format!("{}_FORMAT", ENV_PREFIX)) {
config.output.format = Some(val);
}
if let Ok(val) = env::var(format!("{}_TIMESTAMPS", ENV_PREFIX)) {
config.output.timestamps = Some(val.parse().unwrap_or(true));
}
if let Ok(val) = env::var(format!("{}_STATISTICS", ENV_PREFIX)) {
config.output.statistics = Some(val.parse().unwrap_or(true));
}
if let Ok(val) = env::var(format!("{}_VERBOSE", ENV_PREFIX)) {
config.output.verbose = Some(val.parse().unwrap_or(false));
}
if let Ok(val) = env::var(format!("{}_QUIET", ENV_PREFIX)) {
config.output.quiet = Some(val.parse().unwrap_or(false));
}
if let Ok(val) = env::var(format!("{}_LOG_LEVEL", ENV_PREFIX)) {
config.logging.level = Some(val);
}
if let Ok(val) = env::var(format!("{}_LOG_FILE", ENV_PREFIX)) {
config.logging.file_path = Some(val);
}
if let Ok(val) = env::var(format!("{}_STRUCTURED", ENV_PREFIX)) {
config.logging.structured = Some(val.parse().unwrap_or(false));
}
config
}
pub fn get_config(&self) -> GlobalConfig {
self.global_config.read().clone()
}
pub fn create_operation_config(&self, overrides: OperationConfig) -> OperationConfig {
let global_config = self.global_config.read();
OperationConfig {
memory: overrides.memory.or(Some(global_config.memory.clone())),
kmer_counting: overrides
.kmer_counting
.or(Some(global_config.kmer_counting.clone())),
output: overrides.output.or(Some(global_config.output.clone())),
}
}
pub fn apply_env_overrides_to_config(&self, config: &mut GlobalConfig) {
*config = Self::apply_env_overrides(config.clone());
}
pub fn validate_config(config: &GlobalConfig) -> Result<Vec<String>> {
let mut errors = Vec::new();
if let Some(limit) = config.memory.memory_limit {
if limit == 0 {
errors.push("Memory limit cannot be zero".to_string());
}
}
if let Some(threshold) = config.memory.mmap_threshold {
if threshold == 0 {
errors.push("Memory mapping threshold cannot be zero".to_string());
}
}
if let Some(page_size) = config.memory.page_size {
if page_size == 0 {
errors.push("Page size cannot be zero".to_string());
}
}
if let Some(k) = config.kmer_counting.default_k {
if k == 0 || k > 64 {
errors.push("Default k-mer size must be between 1 and 64".to_string());
}
}
if let Some(threads) = config.kmer_counting.threads {
if threads > 256 {
errors.push("Thread count should not exceed 256".to_string());
}
}
if let Some(hash_size) = config.kmer_counting.hash_size {
if hash_size == 0 {
errors.push("Hash table size cannot be zero".to_string());
}
}
if let Some(extension) = &config.database.extension {
if extension.is_empty() {
errors.push("Database extension cannot be empty".to_string());
}
}
Ok(errors)
}
pub fn get_config_with_fallback<T>(&self, key: &str, fallback: T) -> T
where
T: std::str::FromStr,
{
let env_key = format!("{}_{}", ENV_PREFIX, key.to_uppercase());
env::var(&env_key)
.ok()
.and_then(|val| val.parse().ok())
.unwrap_or(fallback)
}
pub fn set_env_var(&self, key: &str, value: &str) -> Result<()> {
unsafe { env::set_var(format!("{}_{}", ENV_PREFIX, key.to_uppercase()), value) };
Ok(())
}
pub fn remove_env_var(&self, key: &str) -> Result<()> {
unsafe { env::remove_var(format!("{}_{}", ENV_PREFIX, key.to_uppercase())) };
Ok(())
}
pub fn generate_report(&self) -> ConfigReport {
let config = self.global_config.read();
ConfigReport {
config_file: self.config_file.clone(),
config: config.clone(),
environment_overrides: Self::get_all_env_overrides(),
validation_errors: Self::validate_config(&config).unwrap_or_default(),
}
}
fn get_all_env_overrides() -> HashMap<String, String> {
let mut overrides = HashMap::new();
for (key, value) in env::vars() {
if key.starts_with(ENV_PREFIX) {
let clean_key = key.strip_prefix(ENV_PREFIX).unwrap().to_string();
overrides.insert(clean_key, value);
}
}
overrides
}
}
#[derive(Debug, Clone)]
pub struct ConfigReport {
pub config_file: Option<PathBuf>,
pub config: GlobalConfig,
pub environment_overrides: HashMap<String, String>,
pub validation_errors: Vec<String>,
}
impl ConfigReport {
pub fn print(&self) {
println!("=== RustKmer Configuration Report ===");
if let Some(ref file) = self.config_file {
println!("Config file: {}", file.display());
} else {
println!("No configuration file found");
}
println!("\nEnvironment overrides:");
if self.environment_overrides.is_empty() {
println!(" None");
} else {
for (key, value) in &self.environment_overrides {
println!(" {}: {}", key, value);
}
}
println!("\nCurrent configuration:");
println!(
" Memory limit: {:?} MB",
self.config.memory.memory_limit.map(|x| x / 1024 / 1024)
);
println!(
" Memory mapping threshold: {:?} MB",
self.config.memory.mmap_threshold.map(|x| x / 1024 / 1024)
);
println!(" Default k: {:?}", self.config.kmer_counting.default_k);
println!(" Canonical: {:?}", self.config.kmer_counting.canonical);
println!(" Threads: {:?}", self.config.kmer_counting.threads);
println!(" Output format: {:?}", self.config.output.format);
if !self.validation_errors.is_empty() {
println!("\nValidation errors:");
for error in &self.validation_errors {
println!(" - {}", error);
}
} else {
println!("\n✅ Configuration is valid");
}
}
pub fn is_valid(&self) -> bool {
self.validation_errors.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_default_config() {
let manager = ConfigManager::new();
let config = manager.get_config();
assert_eq!(config.kmer_counting.default_k, Some(31));
assert_eq!(config.memory.memory_limit, Some(1024 * 1024 * 1024));
assert_eq!(config.output.format, Some("text".to_string()));
}
#[test]
fn test_env_overrides() {
unsafe {
env::set_var("RUSTKMER_DEFAULT_K", "21");
env::set_var("RUSTKMER_THREADS", "4");
env::set_var("RUSTKMER_VERBOSE", "true");
}
let manager = ConfigManager::new();
manager.load().unwrap();
let config = manager.get_config();
assert_eq!(config.kmer_counting.default_k, Some(21));
assert_eq!(config.kmer_counting.threads, Some(4));
assert_eq!(config.output.verbose, Some(true));
unsafe { env::remove_var("RUSTKMER_DEFAULT_K") };
unsafe { env::remove_var("RUSTKMER_THREADS") };
unsafe { env::remove_var("RUSTKMER_VERBOSE") };
}
#[test]
fn test_config_file_operations() -> Result<()> {
let mut temp_file = NamedTempFile::new()?;
let config_content = r#"
[database]
format = "binary"
compression = false
[memory]
memory_limit = 512000000 # 512MB
mmap_threshold = 50000000 # 50MB
[kmer_counting]
default_k = 25
canonical = true
threads = 2
[output]
format = "json"
verbose = true
[logging]
level = "info"
"#;
temp_file.write_all(config_content.as_bytes())?;
let manager = ConfigManager::with_config_file(temp_file.path());
manager.load()?;
let config = manager.get_config();
assert_eq!(config.memory.memory_limit, Some(512000000));
assert_eq!(config.memory.mmap_threshold, Some(50000000));
assert_eq!(config.kmer_counting.default_k, Some(25));
assert_eq!(config.kmer_counting.canonical, Some(true));
assert_eq!(config.kmer_counting.threads, Some(2));
assert_eq!(config.output.format, Some("json".to_string()));
assert_eq!(config.output.verbose, Some(true));
manager.save()?;
Ok(())
}
#[test]
fn test_operation_config() {
let manager = ConfigManager::new();
manager.load().unwrap();
let overrides = OperationConfig {
memory: Some(MemoryConfig {
memory_limit: Some(2048 * 1024 * 1024), ..Default::default()
}),
kmer_counting: Some(KmerCountingConfig {
default_k: Some(33),
..Default::default()
}),
output: Some(OutputConfig {
format: Some("csv".to_string()),
..Default::default()
}),
};
let op_config = manager.create_operation_config(overrides);
assert_eq!(
op_config.memory.unwrap().memory_limit,
Some(2048 * 1024 * 1024)
);
assert_eq!(op_config.kmer_counting.unwrap().default_k, Some(33));
assert_eq!(op_config.output.unwrap().format, Some("csv".to_string()));
}
#[test]
fn test_config_validation() -> Result<()> {
let mut config = GlobalConfig::default();
config.memory.memory_limit = Some(0); config.kmer_counting.default_k = Some(0); config.memory.page_size = Some(0);
let errors = ConfigManager::validate_config(&config).unwrap();
assert!(!errors.is_empty());
assert!(errors.contains(&"Memory limit cannot be zero".to_string()));
config = GlobalConfig::default();
let errors = ConfigManager::validate_config(&config).unwrap();
assert!(errors.is_empty());
Ok(())
}
}