use serde::{Deserialize, Serialize};
use solo_core::{Error, Result};
use std::path::Path;
use crate::key_material::SALT_LEN;
pub const CONFIG_SCHEMA_VERSION: u32 = 1;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct SoloConfig {
pub schema_version: u32,
pub salt_hex: String,
pub embedder: EmbedderConfig,
#[serde(default)]
pub identity: IdentityConfig,
#[serde(default)]
pub documents: DocumentConfig,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct IdentityConfig {
#[serde(default)]
pub user_aliases: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct DocumentConfig {
#[serde(default = "default_chunk_token_target")]
pub chunk_token_target: u32,
#[serde(default = "default_chunk_overlap_tokens")]
pub chunk_overlap_tokens: u32,
#[serde(default = "default_allowed_extensions")]
pub allowed_extensions: Vec<String>,
}
fn default_chunk_token_target() -> u32 {
500
}
fn default_chunk_overlap_tokens() -> u32 {
50
}
fn default_allowed_extensions() -> Vec<String> {
vec![
"md", "markdown", "txt", "rs", "py", "toml", "yaml", "yml", "json", "pdf", "html", "htm",
]
.into_iter()
.map(String::from)
.collect()
}
impl Default for DocumentConfig {
fn default() -> Self {
Self {
chunk_token_target: default_chunk_token_target(),
chunk_overlap_tokens: default_chunk_overlap_tokens(),
allowed_extensions: default_allowed_extensions(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct EmbedderConfig {
pub name: String,
pub version: String,
pub dim: u32,
pub dtype: String,
}
impl SoloConfig {
pub fn new(salt: [u8; SALT_LEN], embedder: EmbedderConfig) -> Self {
Self {
schema_version: CONFIG_SCHEMA_VERSION,
salt_hex: hex::encode(salt),
embedder,
identity: IdentityConfig::default(),
documents: DocumentConfig::default(),
}
}
pub fn salt_bytes(&self) -> Result<[u8; SALT_LEN]> {
let bytes = hex::decode(&self.salt_hex)
.map_err(|e| Error::storage(format!("config salt_hex is not valid hex: {e}")))?;
if bytes.len() != SALT_LEN {
return Err(Error::storage(format!(
"config salt_hex must decode to {} bytes, got {}",
SALT_LEN,
bytes.len()
)));
}
let mut out = [0u8; SALT_LEN];
out.copy_from_slice(&bytes);
Ok(out)
}
pub fn write(&self, path: &Path) -> Result<()> {
if path.exists() {
return Err(Error::conflict(format!(
"config already exists: {}",
path.display()
)));
}
let tmp_path = path.with_extension("toml.tmp");
let body = toml::to_string_pretty(self)
.map_err(|e| Error::storage(format!("toml serialize: {e}")))?;
{
let mut tmp_file = std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&tmp_path)
.map_err(|e| Error::storage(format!("open tmp {}: {e}", tmp_path.display())))?;
std::io::Write::write_all(&mut tmp_file, body.as_bytes())
.map_err(|e| Error::storage(format!("write {}: {e}", tmp_path.display())))?;
tmp_file
.sync_all()
.map_err(|e| Error::storage(format!("fsync tmp {}: {e}", tmp_path.display())))?;
}
std::fs::rename(&tmp_path, path)
.map_err(|e| Error::storage(format!("rename to {}: {e}", path.display())))?;
#[cfg(unix)]
{
if let Some(parent) = path.parent() {
if let Ok(d) = std::fs::OpenOptions::new().read(true).open(parent) {
let _ = d.sync_all();
}
}
}
Ok(())
}
pub fn read(path: &Path) -> Result<Self> {
let body = std::fs::read_to_string(path)
.map_err(|e| Error::storage(format!("read {}: {e}", path.display())))?;
let cfg: Self = toml::from_str(&body)
.map_err(|e| Error::storage(format!("toml parse {}: {e}", path.display())))?;
if cfg.schema_version != CONFIG_SCHEMA_VERSION {
return Err(Error::storage(format!(
"config schema_version mismatch: file is v{}, this binary expects v{}",
cfg.schema_version, CONFIG_SCHEMA_VERSION
)));
}
let _ = cfg.salt_bytes()?;
Ok(cfg)
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn fixture_embedder() -> EmbedderConfig {
EmbedderConfig {
name: "bge-m3".into(),
version: "v1.0".into(),
dim: 1024,
dtype: "f32".into(),
}
}
#[test]
fn roundtrip_via_disk() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
let salt = [7u8; SALT_LEN];
let cfg = SoloConfig::new(salt, fixture_embedder());
cfg.write(&path).unwrap();
let read_back = SoloConfig::read(&path).unwrap();
assert_eq!(cfg, read_back);
assert_eq!(read_back.salt_bytes().unwrap(), salt);
}
#[test]
fn write_refuses_overwrite() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
let cfg = SoloConfig::new([0; SALT_LEN], fixture_embedder());
cfg.write(&path).unwrap();
let err = cfg.write(&path).unwrap_err();
assert!(err.to_string().contains("already exists"), "got: {err}");
}
#[test]
fn read_rejects_wrong_schema_version() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
r#"
schema_version = 99
salt_hex = "00000000000000000000000000000000"
[embedder]
name = "bge-m3"
version = "v1.0"
dim = 1024
dtype = "f32"
"#,
)
.unwrap();
let err = SoloConfig::read(&path).unwrap_err();
assert!(err.to_string().contains("schema_version mismatch"), "got: {err}");
}
#[test]
fn read_rejects_non_hex_salt() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
format!(
r#"
schema_version = {CONFIG_SCHEMA_VERSION}
salt_hex = "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ"
[embedder]
name = "bge-m3"
version = "v1.0"
dim = 1024
dtype = "f32"
"#
),
)
.unwrap();
let err = SoloConfig::read(&path).unwrap_err();
assert!(err.to_string().contains("salt_hex"), "got: {err}");
}
#[test]
fn read_rejects_missing_embedder_block() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
format!(
r#"
schema_version = {CONFIG_SCHEMA_VERSION}
salt_hex = "00000000000000000000000000000000"
"#
),
)
.unwrap();
let err = SoloConfig::read(&path).unwrap_err();
assert!(err.to_string().to_lowercase().contains("embedder") || err.to_string().contains("missing"), "got: {err}");
}
#[test]
fn read_loads_user_aliases_from_identity_block() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
format!(
r#"
schema_version = {CONFIG_SCHEMA_VERSION}
salt_hex = "00000000000000000000000000000000"
[embedder]
name = "bge-m3"
version = "v1.0"
dim = 1024
dtype = "f32"
[identity]
user_aliases = ["alex", "alice"]
"#
),
)
.unwrap();
let cfg = SoloConfig::read(&path).expect("read ok");
assert_eq!(cfg.identity.user_aliases, vec!["alex".to_string(), "alice".to_string()]);
}
#[test]
fn read_defaults_identity_when_block_absent() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
format!(
r#"
schema_version = {CONFIG_SCHEMA_VERSION}
salt_hex = "00000000000000000000000000000000"
[embedder]
name = "bge-m3"
version = "v1.0"
dim = 1024
dtype = "f32"
"#
),
)
.unwrap();
let cfg = SoloConfig::read(&path).expect("read ok");
assert!(cfg.identity.user_aliases.is_empty());
}
#[test]
fn read_defaults_user_aliases_when_identity_block_empty() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
format!(
r#"
schema_version = {CONFIG_SCHEMA_VERSION}
salt_hex = "00000000000000000000000000000000"
[embedder]
name = "bge-m3"
version = "v1.0"
dim = 1024
dtype = "f32"
[identity]
"#
),
)
.unwrap();
let cfg = SoloConfig::read(&path).expect("read ok");
assert!(cfg.identity.user_aliases.is_empty());
}
#[test]
fn read_defaults_documents_when_block_absent() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
format!(
r#"
schema_version = {CONFIG_SCHEMA_VERSION}
salt_hex = "00000000000000000000000000000000"
[embedder]
name = "bge-m3"
version = "v1.0"
dim = 1024
dtype = "f32"
"#
),
)
.unwrap();
let cfg = SoloConfig::read(&path).expect("read ok");
assert_eq!(cfg.documents.chunk_token_target, 500);
assert_eq!(cfg.documents.chunk_overlap_tokens, 50);
assert!(cfg.documents.allowed_extensions.contains(&"md".to_string()));
assert!(cfg.documents.allowed_extensions.contains(&"pdf".to_string()));
}
#[test]
fn read_loads_custom_documents_block() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
format!(
r#"
schema_version = {CONFIG_SCHEMA_VERSION}
salt_hex = "00000000000000000000000000000000"
[embedder]
name = "bge-m3"
version = "v1.0"
dim = 1024
dtype = "f32"
[documents]
chunk_token_target = 250
chunk_overlap_tokens = 25
allowed_extensions = ["md", "txt"]
"#
),
)
.unwrap();
let cfg = SoloConfig::read(&path).expect("read ok");
assert_eq!(cfg.documents.chunk_token_target, 250);
assert_eq!(cfg.documents.chunk_overlap_tokens, 25);
assert_eq!(cfg.documents.allowed_extensions, vec!["md".to_string(), "txt".to_string()]);
}
#[test]
fn document_config_default_matches_plan() {
let d = DocumentConfig::default();
assert_eq!(d.chunk_token_target, 500);
assert_eq!(d.chunk_overlap_tokens, 50);
for ext in &["md", "markdown", "txt", "rs", "py", "toml", "yaml", "yml", "json", "pdf", "html", "htm"] {
assert!(
d.allowed_extensions.iter().any(|e| e == ext),
"default allowed_extensions missing {ext}"
);
}
}
#[test]
fn read_rejects_short_salt_hex() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("solo.config.toml");
std::fs::write(
&path,
format!(
r#"
schema_version = {CONFIG_SCHEMA_VERSION}
salt_hex = "deadbeef"
[embedder]
name = "bge-m3"
version = "v1.0"
dim = 1024
dtype = "f32"
"#
),
)
.unwrap();
let err = SoloConfig::read(&path).unwrap_err();
assert!(err.to_string().contains("salt_hex"), "got: {err}");
}
}