use crate::error::{Result, RypeError};
use serde::Deserialize;
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
#[derive(Debug, Deserialize)]
pub struct ConfigFile {
pub index: IndexSettings,
pub buckets: HashMap<String, BucketDefinition>,
}
#[derive(Debug, Deserialize)]
pub struct IndexSettings {
#[serde(default = "default_k")]
pub k: usize,
pub window: usize,
pub salt: u64,
pub output: PathBuf,
pub max_shard_size: Option<usize>,
#[serde(default)]
pub invert: Option<bool>,
#[serde(default)]
pub orient_sequences: Option<bool>,
}
fn default_k() -> usize {
64
}
#[derive(Debug, Deserialize)]
pub struct BucketDefinition {
pub files: Vec<PathBuf>,
}
pub fn parse_config(path: &Path) -> Result<ConfigFile> {
let contents =
fs::read_to_string(path).map_err(|e| RypeError::io(path, "read config file", e))?;
let config: ConfigFile = toml::from_str(&contents)
.map_err(|e| RypeError::validation(format!("Failed to parse TOML config: {}", e)))?;
if config.buckets.is_empty() {
return Err(RypeError::validation(
"Config must define at least one bucket",
));
}
if !matches!(config.index.k, 16 | 32 | 64) {
return Err(RypeError::validation(format!(
"Config error: k must be 16, 32, or 64 (got {})",
config.index.k
)));
}
Ok(config)
}
pub fn validate_config(config: &ConfigFile, config_dir: &Path) -> Result<()> {
for (bucket_name, bucket_def) in &config.buckets {
if bucket_def.files.is_empty() {
return Err(RypeError::validation(format!(
"Bucket '{}' has no files",
bucket_name
)));
}
for file_path in &bucket_def.files {
let abs_path = resolve_path(config_dir, file_path);
if !abs_path.exists() {
return Err(RypeError::io(
&abs_path,
"validate config",
std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found for bucket '{}'", bucket_name),
),
));
}
}
}
Ok(())
}
pub fn resolve_path(base: &Path, path: &Path) -> PathBuf {
if path.is_absolute() {
path.to_path_buf()
} else {
base.join(path)
}
}
#[derive(Debug, Deserialize)]
pub struct BucketAddConfig {
pub target: BucketAddTarget,
pub assignment: AssignmentSettings,
pub files: FileList,
}
#[derive(Debug, Deserialize)]
pub struct BucketAddTarget {
pub index: PathBuf,
}
#[derive(Debug, Deserialize)]
#[serde(tag = "mode")]
pub enum AssignmentSettings {
#[serde(rename = "new_bucket")]
NewBucket {
bucket_name: Option<String>,
},
#[serde(rename = "existing_bucket")]
ExistingBucket {
bucket_name: String,
},
#[serde(rename = "best_bin")]
BestBin {
threshold: f64,
#[serde(default)]
fallback: BestBinFallback,
},
}
#[derive(Debug, Deserialize, Clone, Copy, Default, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum BestBinFallback {
#[default]
CreateNew,
Skip,
Error,
}
#[derive(Debug, Deserialize)]
pub struct FileList {
pub paths: Vec<PathBuf>,
pub bucket_name: Option<String>,
}
pub fn parse_bucket_add_config(path: &Path) -> Result<BucketAddConfig> {
let contents =
fs::read_to_string(path).map_err(|e| RypeError::io(path, "read config file", e))?;
let config: BucketAddConfig = toml::from_str(&contents)
.map_err(|e| RypeError::validation(format!("Failed to parse TOML config: {}", e)))?;
if let AssignmentSettings::BestBin { threshold, .. } = &config.assignment {
if *threshold < 0.0 || *threshold > 1.0 {
return Err(RypeError::validation(format!(
"Config error: threshold must be between 0.0 and 1.0 (got {})",
threshold
)));
}
}
if config.files.paths.is_empty() {
return Err(RypeError::validation(
"Config must specify at least one file in [files].paths",
));
}
Ok(config)
}
pub fn validate_bucket_add_config(config: &BucketAddConfig, config_dir: &Path) -> Result<()> {
let index_path = resolve_path(config_dir, &config.target.index);
if !index_path.exists() {
return Err(RypeError::io(
&index_path,
"validate config",
std::io::Error::new(std::io::ErrorKind::NotFound, "Target index not found"),
));
}
for file_path in &config.files.paths {
let abs_path = resolve_path(config_dir, file_path);
if !abs_path.exists() {
return Err(RypeError::io(
&abs_path,
"validate config",
std::io::Error::new(std::io::ErrorKind::NotFound, "File not found"),
));
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::tempdir;
#[test]
fn test_parse_valid_config() {
let dir = tempdir().unwrap();
let config_path = dir.path().join("config.toml");
let config_content = r#"
[index]
window = 50
salt = 0x5555555555555555
output = "test.ryidx"
[buckets.TestBucket]
files = ["test.fa"]
"#;
let mut file = File::create(&config_path).unwrap();
file.write_all(config_content.as_bytes()).unwrap();
let result = parse_config(&config_path);
assert!(result.is_ok());
let config = result.unwrap();
assert_eq!(config.index.window, 50);
assert_eq!(config.buckets.len(), 1);
assert!(config.buckets.contains_key("TestBucket"));
}
#[test]
fn test_parse_empty_buckets() {
let dir = tempdir().unwrap();
let config_path = dir.path().join("config.toml");
let config_content = r#"
[index]
window = 50
salt = 0x5555555555555555
output = "test.ryidx"
"#;
let mut file = File::create(&config_path).unwrap();
file.write_all(config_content.as_bytes()).unwrap();
let result = parse_config(&config_path);
assert!(result.is_err());
}
#[test]
fn test_resolve_path() {
let base = Path::new("/home/user");
let relative = Path::new("file.txt");
assert_eq!(
resolve_path(base, relative),
PathBuf::from("/home/user/file.txt")
);
let absolute = Path::new("/tmp/file.txt");
assert_eq!(resolve_path(base, absolute), PathBuf::from("/tmp/file.txt"));
}
#[test]
fn test_parse_config_with_orient_sequences() {
let dir = tempdir().unwrap();
let config_path = dir.path().join("config.toml");
let config_content = r#"
[index]
window = 50
salt = 0x5555555555555555
output = "test.ryidx"
orient_sequences = true
[buckets.TestBucket]
files = ["test.fa"]
"#;
let mut file = File::create(&config_path).unwrap();
file.write_all(config_content.as_bytes()).unwrap();
let config = parse_config(&config_path).unwrap();
assert_eq!(config.index.orient_sequences, Some(true));
}
#[test]
fn test_parse_config_orient_sequences_defaults_none() {
let dir = tempdir().unwrap();
let config_path = dir.path().join("config.toml");
let config_content = r#"
[index]
window = 50
salt = 0x5555555555555555
output = "test.ryidx"
[buckets.TestBucket]
files = ["test.fa"]
"#;
let mut file = File::create(&config_path).unwrap();
file.write_all(config_content.as_bytes()).unwrap();
let config = parse_config(&config_path).unwrap();
assert_eq!(config.index.orient_sequences, None);
}
#[test]
fn test_parse_config_orient_sequences_false() {
let dir = tempdir().unwrap();
let config_path = dir.path().join("config.toml");
let config_content = r#"
[index]
window = 50
salt = 0x5555555555555555
output = "test.ryidx"
orient_sequences = false
[buckets.TestBucket]
files = ["test.fa"]
"#;
let mut file = File::create(&config_path).unwrap();
file.write_all(config_content.as_bytes()).unwrap();
let config = parse_config(&config_path).unwrap();
assert_eq!(config.index.orient_sequences, Some(false));
}
}