use std::{
collections::BTreeMap,
path::{Path, PathBuf},
};
use anyhow::{Context, Result, anyhow, bail};
use figment::{
Figment,
providers::{Env, Format, Toml},
};
use serde::{Deserialize, Deserializer, Serialize, de};
use serde_json::Value;
use url::Url;
fn parse_byte_size(raw: &str) -> Result<usize, String> {
let trimmed = raw.trim();
if trimmed.is_empty() {
return Err("byte-size value is empty".to_owned());
}
let split = trimmed
.find(|c: char| c.is_ascii_alphabetic())
.unwrap_or(trimmed.len());
let (number, unit) = trimmed.split_at(split);
let number: f64 = number
.trim()
.parse()
.map_err(|_| format!("byte-size value {raw:?} is not a number"))?;
if !number.is_finite() || number < 0.0 {
return Err(format!("byte-size value {raw:?} must be non-negative"));
}
let multiplier: f64 = match unit.trim().to_ascii_lowercase().as_str() {
"" | "b" => 1.0,
"k" | "kb" => 1_000.0,
"kib" => 1_024.0,
"m" | "mb" => 1_000_000.0,
"mib" => 1_048_576.0,
"g" | "gb" => 1_000_000_000.0,
"gib" => 1_073_741_824.0,
"tib" => 1_099_511_627_776.0,
other => {
return Err(format!(
"byte-size unit {other:?} not recognized (try MiB / GiB)"
));
}
};
let bytes = number * multiplier;
if !bytes.is_finite() || bytes > usize::MAX as f64 {
return Err(format!("byte-size value {raw:?} overflows usize"));
}
Ok(bytes as usize)
}
fn lenient_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum Repr {
Text(String),
Int(i64),
Float(f64),
Bool(bool),
}
Ok(
Option::<Repr>::deserialize(deserializer)?.map(|repr| match repr {
Repr::Text(value) => value,
Repr::Int(value) => value.to_string(),
Repr::Float(value) => value.to_string(),
Repr::Bool(value) => value.to_string(),
}),
)
}
fn deserialize_byte_size_opt<'de, D>(deserializer: D) -> Result<Option<usize>, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum Repr {
Bytes(u64),
Text(String),
}
let repr: Option<Repr> = Option::deserialize(deserializer)?;
match repr {
None => Ok(None),
Some(Repr::Bytes(value)) => usize::try_from(value).map(Some).map_err(de::Error::custom),
Some(Repr::Text(value)) => parse_byte_size(&value).map(Some).map_err(de::Error::custom),
}
}
pub fn is_local(url: &Url) -> bool {
matches!(url.scheme(), "file" | "file+uring")
}
pub fn local_path(url: &Url) -> Option<PathBuf> {
if is_local(url) {
url.to_file_path().ok()
} else {
None
}
}
pub fn child_uri(base: &Url, suffix: &str) -> String {
if let Some(path) = local_path(base) {
return path.join(suffix).display().to_string();
}
format!("{}/{suffix}", base.as_str().trim_end_matches('/'))
}
pub fn display(url: &Url) -> String {
if let Some(path) = local_path(url) {
contract_home(&path).display().to_string()
} else {
url.to_string()
}
}
pub fn url_for_path(path: impl AsRef<Path>) -> Result<Url> {
let path = path.as_ref();
let absolute = if path.is_absolute() {
path.to_path_buf()
} else {
std::path::absolute(path)
.with_context(|| format!("failed to absolutize {}", path.display()))?
};
Url::from_file_path(&absolute).map_err(|()| {
anyhow!(
"failed to convert path {} into a file:// URL",
absolute.display()
)
})
}
pub const DEFAULT_CONFIG_TOML: &str = "\
# pond configuration.
#
# pond ships built-in defaults, so every setting here is optional - delete this
# file and pond still works. Uncomment and edit to override.
# Where pond looks for source data to import. One entry per adapter type
# (`claude-code`, `codex-cli`, ...). `pond sync` with no arguments syncs every
# entry; `pond sync <adapter>` syncs just one. With an empty `[sources]`,
# `pond sync` runs an interactive discovery against the known default paths
# and writes the picks back here.
#
# Future wrap: pond is single-namespace in v1 (spec.md#wire-namespace-resolution); `[sources]` is
# flat here. When multi-namespace pond lands, source registration becomes
# per-tenant under `[namespaces.<ns>.sources.<adapter>]`. Pre-v1 the schema
# is breakable; the rename is operationally free until a real second tenant
# exists.
#
# [sources.claude-code]
# enabled = true
# path = \"~/.claude/projects\"
#
# [sources.codex-cli]
# enabled = true
# path = \"~/.codex/sessions\"
#
# Set `enabled = false` to keep the section but skip it on `pond sync`;
# re-enable via `pond sync <adapter>`.
# Embeddings. Search runs hybrid (vector + FTS) whenever the store has any
# vectors, and FTS-only otherwise - the model loads lazily on the first hybrid
# query, so there's no cost on FTS-only corpora. `model` selects the
# HuggingFace XLM-RoBERTa model; `dim` declares its output width and is baked
# into the messages.vector schema on table creation - it must equal the
# model's hidden_size and be a multiple of 8 (IVF_PQ subspace stride).
#
# Common pairings:
# model = \"intfloat/multilingual-e5-small\" dim = 384 (default)
# model = \"intfloat/multilingual-e5-base\" dim = 768
# model = \"intfloat/multilingual-e5-large\" dim = 1024
#
# A different-dim model needs a fresh data dir; pond enforces this at the
# schema boundary.
#
# [embeddings]
# model = \"intfloat/multilingual-e5-small\"
# dim = 384
# Search tuning. Leave unset for Lance defaults; set when tuning IVF_PQ recall
# against a corpus.
#
# [search]
# nprobes = 16
# refine_factor = 2
# Storage maintenance. Tunes the compaction + cleanup pass that runs inside
# `pond sync` and `pond index optimize`.
#
# - `compaction_fragment_cap` is the per-task fragment-count backstop: a
# planned compaction task touching at least this many fragments always runs
# even when the write-amplification veto would skip it. Default 64; 0
# disables the veto and runs every task Lance plans.
# - `cleanup_older_than` is the manifest-retention window for the safe cleanup
# pass. Accepts `Ns` / `Nm` / `Nh` / `Nd` (default `1d`, floor `1h` - it is
# what protects in-flight readers). Versions older than this are reclaimed
# by Lance's OCC-coordinated GC.
# - `index_lag_threshold` is the minimum unindexed-fragment count before a
# per-intent append/rebuild runs in `pond index optimize`; the brute-force
# fallback keeps queries correct while fragments accumulate. Default 4.
#
# [maintenance]
# compaction_fragment_cap = 64
# cleanup_older_than = \"1d\"
# index_lag_threshold = 4
# Long-running process caps. Both accept either a plain byte count or a
# humansize-style suffix (\"128 MiB\", \"1 GiB\"). Both are optional - leave
# unset to let pond pick the backend-aware default:
# local FS : index_cache = 256 MiB, metadata_cache = 128 MiB
# remote : index_cache = 2 GiB, metadata_cache = 512 MiB
# Lance's library defaults (6 GiB / 1 GiB) are too generous for a per-session
# `pond mcp` process; tightening them is what keeps RSS under the 500 MiB target
# without measurable latency regressions on typical agent-history corpora.
#
# [runtime]
# index_cache_bytes = \"256 MiB\"
# metadata_cache_bytes = \"128 MiB\"
# Storage address and credentials (spec.md#storage-url-grammar).
#
# `path` is the default destination used when `--storage-path` (env
# `POND_STORAGE_PATH`) is not passed. Absent = the platform-local data dir.
# Addresses are URLs; the `s3+https` form carries the endpoint, bucket, and
# prefix in one token:
#
# /abs/path or ~/path local filesystem
# s3://bucket/prefix AWS S3 (ambient credential chain)
# s3+https://host/bucket/prefix S3-compatible endpoint (Hetzner, R2, B2, MinIO)
# gs://bucket/prefix Google Cloud Storage
# az://account/container/prefix Azure Blob
#
# Credentials live in `[creds.<name>]` sets and bind to URLs by `scope`
# prefix - longest match wins (spec.md#creds-scope-match); a set without
# `scope` matches any URL. With no matching set, the standard cloud SDK
# chain applies (AWS_* env, shared credentials file, instance metadata).
# Secrets never go in URLs or CLI flags; besides inline values,
# `access_key_id_file` / `secret_access_key_file` read a file and
# `secret_access_key_command` runs a command (e.g. `op read ...`). `extra`
# holds verbatim `object_store` options pond has not typed.
#
# Every field mirrors to env: `POND_STORAGE_PATH`, `POND_CREDS_<NAME>_<FIELD>`
# (set names are lowercase alphanumeric, so the env grammar is unambiguous).
# Precedence: CLI flag > POND_* env > this file > ambient cloud chain.
# Probe a destination end-to-end with `pond storage check`.
#
# Future wrap: pond is single-namespace in v1 (spec.md#wire-namespace-resolution);
# `[storage]` is flat here on the assumption of one bucket per pond. When
# multi-namespace pond lands this becomes `[namespaces.<ns>.storage]`.
#
# [storage]
# path = \"s3+https://nbg1.your-objectstorage.com/my-pond\"
#
# [creds.default]
# access_key_id = \"...\"
# secret_access_key = \"...\"
";
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Config {
#[serde(default)]
pub embeddings: EmbeddingsConfig,
#[serde(default)]
pub search: SearchConfig,
#[serde(default)]
pub maintenance: MaintenanceConfig,
#[serde(default)]
pub runtime: RuntimeConfig,
#[serde(default)]
pub sources: BTreeMap<String, Value>,
#[serde(default)]
pub storage: StorageConfig,
#[serde(default)]
pub creds: BTreeMap<String, CredsSet>,
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct StorageConfig {
#[serde(default)]
pub path: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct CredsSet {
#[serde(default)]
pub scope: Option<String>,
#[serde(default, deserialize_with = "lenient_string")]
pub access_key_id: Option<String>,
#[serde(default)]
pub access_key_id_file: Option<PathBuf>,
#[serde(default, deserialize_with = "lenient_string")]
pub secret_access_key: Option<String>,
#[serde(default)]
pub secret_access_key_file: Option<PathBuf>,
#[serde(default)]
pub secret_access_key_command: Option<String>,
#[serde(default, deserialize_with = "lenient_string")]
pub region: Option<String>,
#[serde(default)]
pub virtual_hosted_style_request: Option<bool>,
#[serde(default)]
pub extra: BTreeMap<String, String>,
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields, default)]
pub struct RuntimeConfig {
#[serde(default, deserialize_with = "deserialize_byte_size_opt")]
pub index_cache_bytes: Option<usize>,
#[serde(default, deserialize_with = "deserialize_byte_size_opt")]
pub metadata_cache_bytes: Option<usize>,
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct SearchConfig {
#[serde(default)]
pub nprobes: Option<usize>,
#[serde(default)]
pub refine_factor: Option<u32>,
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct MaintenanceConfig {
#[serde(default)]
pub compaction_fragment_cap: Option<usize>,
#[serde(default)]
pub cleanup_older_than: Option<String>,
#[serde(default)]
pub index_lag_threshold: Option<usize>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(deny_unknown_fields, default)]
pub struct EmbeddingsConfig {
pub model: String,
pub dim: usize,
}
impl Default for EmbeddingsConfig {
fn default() -> Self {
Self {
model: crate::embed::DEFAULT_MODEL_ID.to_owned(),
dim: crate::sessions::DEFAULT_EMBEDDING_DIM,
}
}
}
pub fn default_storage_path(xdg_data_home: Option<PathBuf>, home: Option<PathBuf>) -> Result<Url> {
if let Some(xdg) = xdg_data_home.filter(|path| path.is_absolute()) {
return url_for_path(xdg.join("pond"));
}
if let Some(home) = home {
return url_for_path(home.join(".local").join("share").join("pond"));
}
url_for_path(PathBuf::from(".pond"))
}
pub fn default_config_path(xdg_config_home: Option<PathBuf>, home: Option<PathBuf>) -> PathBuf {
if let Some(xdg) = xdg_config_home.filter(|path| path.is_absolute()) {
return xdg.join("pond").join("config.toml");
}
if let Some(home) = home {
return home.join(".config").join("pond").join("config.toml");
}
PathBuf::from(".pond.toml")
}
impl Config {
pub fn load(path: impl AsRef<Path>) -> Result<Self> {
Ok(Self::load_with_provenance(path)?.0)
}
pub fn load_str(body: &str) -> Result<Self> {
let figment = Figment::new().merge(Toml::string(body)).merge(env_mirror());
let config: Self = figment
.extract_lossy()
.map_err(|error| anyhow!("failed to load config: {error}"))?;
config.embeddings.validate()?;
config.validate_creds()?;
Ok(config)
}
pub fn load_with_provenance(path: impl AsRef<Path>) -> Result<(Self, Figment)> {
let path = path.as_ref();
let figment = Figment::new().merge(Toml::file(path)).merge(env_mirror());
let config: Self = figment.extract_lossy().map_err(|error| {
if let Some(recipe) = detect_legacy_storage(path) {
return anyhow!("{recipe}");
}
anyhow!("failed to load config {}: {error}", path.display())
})?;
config.embeddings.validate()?;
config.validate_creds()?;
config.embeddings.install_runtime();
if let Some(threshold) = config.maintenance.index_lag_threshold {
crate::substrate::init_index_lag_threshold(threshold);
}
Ok((config, figment))
}
fn validate_creds(&self) -> Result<()> {
let mut scopeless: Option<&str> = None;
let mut scopes: BTreeMap<String, &str> = BTreeMap::new();
for (name, set) in &self.creds {
let mut chars = name.chars();
let head_ok = chars.next().is_some_and(|c| c.is_ascii_lowercase());
if !head_ok
|| name.len() > 16
|| !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
{
bail!(
"creds set name {name:?} must match [a-z][a-z0-9]{{0,15}} (lowercase alphanumeric, no separators)"
);
}
if set.access_key_id.is_some() && set.access_key_id_file.is_some() {
bail!("[creds.{name}] sets both access_key_id and access_key_id_file; pick one");
}
let secret_variants = [
set.secret_access_key.is_some(),
set.secret_access_key_file.is_some(),
set.secret_access_key_command.is_some(),
]
.iter()
.filter(|present| **present)
.count();
if secret_variants > 1 {
bail!(
"[creds.{name}] sets more than one of secret_access_key / secret_access_key_file / secret_access_key_command; pick one"
);
}
match set.scope.as_deref() {
None => {
if let Some(other) = scopeless {
bail!(
"[creds.{other}] and [creds.{name}] are both scope-less; at most one catch-all set is allowed - add a `scope` to one"
);
}
scopeless = Some(name);
}
Some(scope) => {
let canonical = crate::substrate::parse_scope(scope)
.map(|url| url.as_str().trim_end_matches('/').to_owned())
.with_context(|| {
format!("[creds.{name}] scope {scope:?} is not a valid URL prefix")
})?;
if let Some(other) = scopes.insert(canonical, name) {
bail!(
"[creds.{other}] and [creds.{name}] declare the same scope {scope:?}; merge them or narrow one"
);
}
}
}
}
Ok(())
}
pub fn resolve_sources(&self, adapter: Option<&str>) -> Result<Vec<(String, Value)>> {
match adapter {
None => Ok(self
.sources
.iter()
.filter_map(|(name, blob)| take_enabled(name, blob))
.collect()),
Some(name) => {
let blob = self
.sources
.get(name)
.ok_or_else(|| anyhow!("no [sources.{name}] entry in config"))?;
take_enabled(name, blob).map(|entry| vec![entry]).ok_or_else(|| {
anyhow!(
"source [{name}] is disabled (enabled = false); run `pond sync {name}` to re-enable"
)
})
}
}
}
pub fn disabled_source_names(&self) -> Vec<&str> {
self.sources
.iter()
.filter_map(|(name, blob)| {
let enabled = blob
.get("enabled")
.and_then(Value::as_bool)
.unwrap_or(false);
if enabled { None } else { Some(name.as_str()) }
})
.collect()
}
}
fn env_mirror() -> Env {
Env::prefixed("POND_")
.filter(|key| {
let key = key.as_str().to_ascii_lowercase();
key == "storage_path" || (key.starts_with("creds_") && !key.ends_with("_extra"))
})
.map(|key| {
let key = key.as_str().to_ascii_lowercase();
let dots = if key.starts_with("creds_") { 2 } else { 1 };
key.replacen('_', ".", dots).into()
})
}
pub const LEGACY_ENDPOINT_KEYS: &[&str] = &["aws_endpoint", "endpoint"];
pub const LEGACY_ACCESS_KEY_KEYS: &[&str] = &["aws_access_key_id", "access_key_id"];
pub const LEGACY_SECRET_KEY_KEYS: &[&str] = &["aws_secret_access_key", "secret_access_key"];
pub const LEGACY_VIRTUAL_HOSTED_KEYS: &[&str] = &[
"aws_virtual_hosted_style_request",
"virtual_hosted_style_request",
];
fn detect_legacy_storage(path: &Path) -> Option<String> {
let text = std::fs::read_to_string(path).ok()?;
let value: toml::Value = toml::from_str(&text).ok()?;
let storage = value.get("storage")?.as_table()?;
if storage.is_empty() || storage.keys().all(|key| key == "path") {
return None;
}
let get = |names: &[&str]| {
storage.iter().find_map(|(key, value)| {
names
.iter()
.any(|name| key.eq_ignore_ascii_case(name))
.then(|| value.as_str().unwrap_or_default().to_owned())
})
};
let endpoint = get(LEGACY_ENDPOINT_KEYS);
let host = endpoint
.as_deref()
.and_then(|e| e.split("://").nth(1))
.unwrap_or("<endpoint-host>");
let virtual_hosted = storage.iter().any(|(key, value)| {
LEGACY_VIRTUAL_HOSTED_KEYS
.iter()
.any(|name| key.eq_ignore_ascii_case(name))
&& (value.as_bool().unwrap_or(false)
|| value
.as_str()
.is_some_and(|text| text.eq_ignore_ascii_case("true") || text == "1"))
});
let path_recipe = match host.split_once('.') {
Some((bucket, rest)) if virtual_hosted && rest.contains('.') => {
format!("s3+https://{rest}/{bucket}/<prefix>")
}
_ => format!("s3+https://{host}/<bucket>/<prefix>"),
};
let mut recipe = format!(
"config {} uses the old [storage] passthrough map; rewrite it as:\n\n[storage]\npath = \"{path_recipe}\"\n\n[creds.default]\n",
path.display(),
);
recipe.push_str("access_key_id = \"...\" # copy from the old [storage] section\n");
recipe.push_str("secret_access_key = \"...\" # copy from the old [storage] section\n");
recipe.push_str(
"\n(the endpoint and bucket fold into the URL; allow_http is scheme-derived; virtual-hosted addressing defaults on; the region is autodetected - append ?region=<x> to the URL only if your store insists. `pond storage check` verifies the result end-to-end, and `pond init` can apply this rewrite for you)",
);
Some(recipe)
}
fn take_enabled(name: &str, blob: &Value) -> Option<(String, Value)> {
let enabled = blob
.get("enabled")
.and_then(Value::as_bool)
.unwrap_or(false);
if !enabled {
return None;
}
let mut clean = blob.clone();
if let Some(obj) = clean.as_object_mut() {
obj.remove("enabled");
}
Some((name.to_owned(), clean))
}
pub fn expand_home_under(path: &Path, home: &Path) -> PathBuf {
let Some(text) = path.to_str() else {
return path.to_path_buf();
};
let home_text = home.to_string_lossy();
let expanded = shellexpand::full_with_context_no_errors(
text,
|| Some(home_text.clone()),
|var| std::env::var(var).ok(),
);
PathBuf::from(expanded.as_ref())
}
pub fn contract_home_under(path: &Path, home: &Path) -> PathBuf {
match path.strip_prefix(home) {
Ok(rest) if rest.as_os_str().is_empty() => PathBuf::from("~"),
Ok(rest) => Path::new("~").join(rest),
Err(_) => path.to_path_buf(),
}
}
pub fn contract_home(path: &Path) -> PathBuf {
match std::env::var_os("HOME") {
Some(home) => contract_home_under(path, Path::new(&home)),
None => path.to_path_buf(),
}
}
impl EmbeddingsConfig {
pub fn validate(&self) -> Result<()> {
if self.model.trim().is_empty() {
bail!("embeddings.model must be a non-empty HuggingFace model id");
}
if self.dim == 0 || !self.dim.is_multiple_of(8) {
bail!(
"embeddings.dim = {} must be a positive multiple of 8 (IVF_PQ subspace stride)",
self.dim,
);
}
Ok(())
}
pub fn install_runtime(&self) {
crate::embed::init_model_id(self.model.clone());
crate::sessions::init_embedding_dim(self.dim);
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::expect_used, clippy::unwrap_used, clippy::result_large_err)]
use super::*;
use serde_json::Value;
use tempfile::TempDir;
#[test]
fn validate_catches_empty_model_and_bad_dim() {
assert!(EmbeddingsConfig::default().validate().is_ok());
let bad_model = EmbeddingsConfig {
model: " ".to_owned(),
dim: 768,
};
assert!(bad_model.validate().is_err());
let bad_dim = EmbeddingsConfig {
model: "intfloat/multilingual-e5-base".to_owned(),
dim: 100,
};
assert!(bad_dim.validate().is_err());
let zero_dim = EmbeddingsConfig {
model: "intfloat/multilingual-e5-base".to_owned(),
dim: 0,
};
assert!(zero_dim.validate().is_err());
}
#[test]
fn config_load_missing_file_falls_back_to_builtin() {
let config = Config::load("/nonexistent/pond-config-xyz.toml").unwrap();
assert_eq!(config.embeddings, EmbeddingsConfig::default());
}
#[test]
fn default_config_toml_loads_to_the_builtin_defaults() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("config.toml");
std::fs::write(&path, DEFAULT_CONFIG_TOML).unwrap();
let config = Config::load(&path).unwrap();
assert_eq!(config.embeddings, EmbeddingsConfig::default());
assert_eq!(config.embeddings.model, crate::embed::DEFAULT_MODEL_ID);
assert_eq!(
config.embeddings.dim,
crate::sessions::DEFAULT_EMBEDDING_DIM
);
}
#[test]
fn default_storage_path_follows_xdg_then_home() {
let resolved =
default_storage_path(Some(PathBuf::from("/xdg")), Some(PathBuf::from("/home")))
.unwrap();
assert!(is_local(&resolved));
assert_eq!(local_path(&resolved).unwrap(), PathBuf::from("/xdg/pond"));
let resolved = default_storage_path(
Some(PathBuf::from("relative")),
Some(PathBuf::from("/home")),
)
.unwrap();
assert_eq!(
local_path(&resolved).unwrap(),
PathBuf::from("/home/.local/share/pond"),
);
let resolved = default_storage_path(None, None).unwrap();
assert!(is_local(&resolved));
assert!(
local_path(&resolved).unwrap().ends_with(".pond"),
"fallback path should end with .pond: {resolved}",
);
}
#[test]
fn expand_home_under_handles_tilde_forms() {
let home = Path::new("/srv/me");
assert_eq!(
expand_home_under(Path::new("~"), home),
PathBuf::from("/srv/me")
);
assert_eq!(
expand_home_under(Path::new("~/.codex/sessions"), home),
PathBuf::from("/srv/me/.codex/sessions"),
);
assert_eq!(
expand_home_under(Path::new("/etc/passwd"), home),
PathBuf::from("/etc/passwd"),
);
assert_eq!(
expand_home_under(Path::new("~user/elsewhere"), home),
PathBuf::from("~user/elsewhere"),
);
}
#[test]
fn expand_home_under_handles_env_vars() {
figment::Jail::expect_with(|jail| {
jail.set_env("POND_TEST_EXPAND_DIR", "/srv/data");
let home = Path::new("/srv/me");
assert_eq!(
expand_home_under(Path::new("$POND_TEST_EXPAND_DIR/pond"), home),
PathBuf::from("/srv/data/pond"),
);
assert_eq!(
expand_home_under(Path::new("${POND_TEST_EXPAND_DIR}/pond"), home),
PathBuf::from("/srv/data/pond"),
);
assert_eq!(
expand_home_under(Path::new("$POND_TEST_UNSET_VAR/x"), home),
PathBuf::from("$POND_TEST_UNSET_VAR/x"),
);
Ok(())
});
}
#[test]
fn contract_home_under_inverts_expansion() {
let home = Path::new("/srv/me");
assert_eq!(
contract_home_under(Path::new("/srv/me/.local/share/pond"), home),
PathBuf::from("~/.local/share/pond"),
);
assert_eq!(
contract_home_under(Path::new("/srv/me"), home),
PathBuf::from("~")
);
assert_eq!(
contract_home_under(Path::new("/etc/passwd"), home),
PathBuf::from("/etc/passwd"),
);
}
#[test]
fn resolve_sources_returns_one_or_all_or_errors() {
let temp = TempDir::new().unwrap();
let body = "\
[sources.claude-code]
enabled = true
path = \"/srv/claude\"
[sources.codex-cli]
enabled = true
path = \"/srv/codex\"
[sources.opencode]
enabled = false
";
let path = temp.path().join("config.toml");
std::fs::write(&path, body).expect("write config");
let config = Config::load(&path).unwrap();
let all = config.resolve_sources(None).unwrap();
assert_eq!(all.len(), 2);
let names: Vec<_> = all.iter().map(|(n, _)| n.as_str()).collect();
assert!(names.contains(&"claude-code"));
assert!(names.contains(&"codex-cli"));
for (_, blob) in &all {
assert!(blob.get("enabled").is_none(), "enabled should be stripped");
}
let one = config.resolve_sources(Some("codex-cli")).unwrap();
assert_eq!(one.len(), 1);
assert_eq!(one[0].0, "codex-cli");
assert_eq!(
one[0].1.get("path").and_then(Value::as_str),
Some("/srv/codex"),
);
let disabled = config.resolve_sources(Some("opencode"));
let err = disabled
.expect_err("disabled adapter must error")
.to_string();
assert!(err.contains("enabled = false"), "got: {err}");
assert!(err.contains("pond sync opencode"), "got: {err}");
assert!(config.resolve_sources(Some("nope")).is_err());
assert_eq!(config.disabled_source_names(), vec!["opencode"]);
}
#[test]
fn memory_uri_is_classified_as_remote() {
let url = Url::parse("memory:///pond-remote-test").expect("memory uri parses");
assert!(
!is_local(&url),
"memory:// is not a local-filesystem URL: {url}",
);
assert!(
local_path(&url).is_none(),
"local_path must return None for non-file schemes",
);
}
#[test]
fn storage_and_creds_round_trip() {
figment::Jail::expect_with(|jail| {
jail.create_file(
"config.toml",
r#"
[storage]
path = "s3+https://nbg1.example.com/my-pond"
[creds.default]
access_key_id = "AKIA123"
secret_access_key = "shh"
[creds.work]
scope = "s3+https://fsn1.example.com/work-pond/"
access_key_id = "AKIA456"
secret_access_key_command = "op read op://vault/pond/secret"
region = "fsn1"
virtual_hosted_style_request = false
extra = { request_timeout = "60 seconds" }
"#,
)?;
let config = Config::load("config.toml").expect("config loads");
assert_eq!(
config.storage.path.as_deref(),
Some("s3+https://nbg1.example.com/my-pond"),
);
assert_eq!(config.creds.len(), 2);
let work = &config.creds["work"];
assert_eq!(
work.secret_access_key_command.as_deref(),
Some("op read op://vault/pond/secret"),
);
assert_eq!(work.virtual_hosted_style_request, Some(false));
assert_eq!(work.extra["request_timeout"], "60 seconds");
Ok(())
});
}
#[test]
fn creds_validators_reject_bad_shapes() {
let cases: &[(&str, &str)] = &[
("[creds.a]\nacces_key_id = \"x\"\n", "acces_key_id"),
("[creds.my_set]\naccess_key_id = \"x\"\n", "[a-z][a-z0-9]"),
("[creds.A1]\naccess_key_id = \"x\"\n", "[a-z][a-z0-9]"),
(
"[creds.a]\nsecret_access_key = \"x\"\nsecret_access_key_command = \"cat\"\n",
"more than one",
),
(
"[creds.a]\naccess_key_id = \"x\"\naccess_key_id_file = \"/k\"\n",
"pick one",
),
(
"[creds.a]\naccess_key_id = \"x\"\n[creds.b]\naccess_key_id = \"y\"\n",
"scope-less",
),
(
"[creds.a]\nscope = \"s3+https://h:443/b/\"\naccess_key_id = \"x\"\n[creds.b]\nscope = \"s3+https://h/b\"\naccess_key_id = \"y\"\n",
"same scope",
),
];
figment::Jail::expect_with(|jail| {
for (body, needle) in cases {
jail.create_file("config.toml", body)?;
let err = Config::load("config.toml").expect_err(body).to_string();
assert!(
err.contains(needle),
"want {needle:?} in error for {body:?}, got: {err}",
);
}
Ok(())
});
}
#[test]
fn legacy_storage_map_errors_with_the_rewrite_recipe() {
figment::Jail::expect_with(|jail| {
jail.create_file(
"config.toml",
r#"
[storage]
AWS_ACCESS_KEY_ID = "AKIA123"
AWS_SECRET_ACCESS_KEY = "shh"
AWS_REGION = "nbg1"
AWS_ENDPOINT = "https://ttq.nbg1.your-objectstorage.com"
aws_virtual_hosted_style_request = "true"
"#,
)?;
let err = Config::load("config.toml")
.expect_err("legacy map must error")
.to_string();
assert!(err.contains("old [storage] passthrough map"), "got: {err}");
assert!(
err.contains("s3+https://nbg1.your-objectstorage.com/ttq/<prefix>"),
"recipe must de-fold the virtual-hosted endpoint, got: {err}",
);
assert!(!err.contains("AKIA123"), "got: {err}");
assert!(!err.contains("\"shh\""), "got: {err}");
assert!(err.contains("access_key_id = \"...\""), "got: {err}");
assert!(!err.contains("region ="), "got: {err}");
assert!(err.contains("?region="), "got: {err}");
assert!(err.contains("pond storage check"), "got: {err}");
jail.create_file(
"config.toml",
r#"
[storage]
AWS_ACCESS_KEY_ID = "AKIA123"
AWS_ENDPOINT = "https://ttq.nbg1.your-objectstorage.com"
"#,
)?;
let err = Config::load("config.toml")
.expect_err("legacy map must error")
.to_string();
assert!(
err.contains("s3+https://ttq.nbg1.your-objectstorage.com/<bucket>/<prefix>"),
"got: {err}",
);
Ok(())
});
}
#[test]
fn env_mirror_layers_over_file() {
figment::Jail::expect_with(|jail| {
jail.create_file(
"config.toml",
r#"
[storage]
path = "/from-file"
[creds.work]
scope = "s3://file-bucket/"
access_key_id = "from-file"
region = "file-region"
"#,
)?;
jail.set_env("POND_STORAGE_PATH", "/from-env");
jail.set_env("POND_CREDS_WORK_ACCESS_KEY_ID", "from-env");
jail.set_env("POND_CREDS_WORK_SECRET_ACCESS_KEY", "12345");
jail.set_env("POND_CREDS_CI_ACCESS_KEY_ID", "ci-key");
let config = Config::load("config.toml").expect("env+file config loads");
assert_eq!(config.storage.path.as_deref(), Some("/from-env"));
let work = &config.creds["work"];
assert_eq!(work.access_key_id.as_deref(), Some("from-env"));
assert_eq!(work.secret_access_key.as_deref(), Some("12345"));
assert_eq!(work.region.as_deref(), Some("file-region"));
assert_eq!(work.scope.as_deref(), Some("s3://file-bucket/"));
assert_eq!(config.creds["ci"].access_key_id.as_deref(), Some("ci-key"));
Ok(())
});
}
}