1use std::{
11 collections::BTreeMap,
12 path::{Path, PathBuf},
13};
14
15use anyhow::{Context, Result, anyhow, bail};
16use figment::{
17 Figment,
18 providers::{Env, Format, Toml},
19};
20use serde::{Deserialize, Deserializer, Serialize, de};
21use serde_json::Value;
22use url::Url;
23
24fn parse_byte_size(raw: &str) -> Result<usize, String> {
29 let trimmed = raw.trim();
30 if trimmed.is_empty() {
31 return Err("byte-size value is empty".to_owned());
32 }
33 let split = trimmed
34 .find(|c: char| c.is_ascii_alphabetic())
35 .unwrap_or(trimmed.len());
36 let (number, unit) = trimmed.split_at(split);
37 let number: f64 = number
38 .trim()
39 .parse()
40 .map_err(|_| format!("byte-size value {raw:?} is not a number"))?;
41 if !number.is_finite() || number < 0.0 {
42 return Err(format!("byte-size value {raw:?} must be non-negative"));
43 }
44 let multiplier: f64 = match unit.trim().to_ascii_lowercase().as_str() {
45 "" | "b" => 1.0,
46 "k" | "kb" => 1_000.0,
47 "kib" => 1_024.0,
48 "m" | "mb" => 1_000_000.0,
49 "mib" => 1_048_576.0,
50 "g" | "gb" => 1_000_000_000.0,
51 "gib" => 1_073_741_824.0,
52 "tib" => 1_099_511_627_776.0,
53 other => {
54 return Err(format!(
55 "byte-size unit {other:?} not recognized (try MiB / GiB)"
56 ));
57 }
58 };
59 let bytes = number * multiplier;
60 if !bytes.is_finite() || bytes > usize::MAX as f64 {
61 return Err(format!("byte-size value {raw:?} overflows usize"));
62 }
63 Ok(bytes as usize)
64}
65
66fn lenient_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
70where
71 D: Deserializer<'de>,
72{
73 #[derive(Deserialize)]
74 #[serde(untagged)]
75 enum Repr {
76 Text(String),
77 Int(i64),
78 Float(f64),
79 Bool(bool),
80 }
81 Ok(
82 Option::<Repr>::deserialize(deserializer)?.map(|repr| match repr {
83 Repr::Text(value) => value,
84 Repr::Int(value) => value.to_string(),
85 Repr::Float(value) => value.to_string(),
86 Repr::Bool(value) => value.to_string(),
87 }),
88 )
89}
90
91fn deserialize_byte_size_opt<'de, D>(deserializer: D) -> Result<Option<usize>, D::Error>
92where
93 D: Deserializer<'de>,
94{
95 #[derive(Deserialize)]
96 #[serde(untagged)]
97 enum Repr {
98 Bytes(u64),
99 Text(String),
100 }
101 let repr: Option<Repr> = Option::deserialize(deserializer)?;
102 match repr {
103 None => Ok(None),
104 Some(Repr::Bytes(value)) => usize::try_from(value).map(Some).map_err(de::Error::custom),
105 Some(Repr::Text(value)) => parse_byte_size(&value).map(Some).map_err(de::Error::custom),
106 }
107}
108
109pub fn is_local(url: &Url) -> bool {
113 matches!(url.scheme(), "file" | "file+uring")
114}
115
116pub fn local_path(url: &Url) -> Option<PathBuf> {
118 if is_local(url) {
119 url.to_file_path().ok()
120 } else {
121 None
122 }
123}
124
125pub fn child_uri(base: &Url, suffix: &str) -> String {
131 if let Some(path) = local_path(base) {
135 return path.join(suffix).display().to_string();
136 }
137 format!("{}/{suffix}", base.as_str().trim_end_matches('/'))
138}
139
140pub fn display(url: &Url) -> String {
144 if let Some(path) = local_path(url) {
145 contract_home(&path).display().to_string()
146 } else {
147 url.to_string()
148 }
149}
150
151pub fn url_for_path(path: impl AsRef<Path>) -> Result<Url> {
156 let path = path.as_ref();
157 let absolute = if path.is_absolute() {
158 path.to_path_buf()
159 } else {
160 std::path::absolute(path)
161 .with_context(|| format!("failed to absolutize {}", path.display()))?
162 };
163 Url::from_file_path(&absolute).map_err(|()| {
164 anyhow!(
165 "failed to convert path {} into a file:// URL",
166 absolute.display()
167 )
168 })
169}
170
171pub const DEFAULT_CONFIG_TOML: &str = "\
175# pond configuration.
176#
177# pond ships built-in defaults, so every setting here is optional - delete this
178# file and pond still works. Uncomment and edit to override.
179
180# Where pond looks for source data to import. One entry per adapter type
181# (`claude-code`, `codex-cli`, ...). `pond sync` with no arguments syncs every
182# entry; `pond sync <adapter>` syncs just one. With an empty `[sources]`,
183# `pond sync` runs an interactive discovery against the known default paths
184# and writes the picks back here.
185#
186# Future wrap: pond is single-namespace in v1 (spec.md#wire-namespace-resolution); `[sources]` is
187# flat here. When multi-namespace pond lands, source registration becomes
188# per-tenant under `[namespaces.<ns>.sources.<adapter>]`. Pre-v1 the schema
189# is breakable; the rename is operationally free until a real second tenant
190# exists.
191#
192# [sources.claude-code]
193# enabled = true
194# path = \"~/.claude/projects\"
195#
196# [sources.codex-cli]
197# enabled = true
198# path = \"~/.codex/sessions\"
199#
200# Set `enabled = false` to keep the section but skip it on `pond sync`;
201# re-enable via `pond sync <adapter>`.
202
203# Embeddings. Search runs hybrid (vector + FTS) whenever the store has any
204# vectors, and FTS-only otherwise - the model loads lazily on the first hybrid
205# query, so there's no cost on FTS-only corpora. `model` selects the
206# HuggingFace XLM-RoBERTa model; `dim` declares its output width and is baked
207# into the messages.vector schema on table creation - it must equal the
208# model's hidden_size and be a multiple of 8 (IVF_PQ subspace stride).
209#
210# Common pairings:
211# model = \"intfloat/multilingual-e5-small\" dim = 384 (default)
212# model = \"intfloat/multilingual-e5-base\" dim = 768
213# model = \"intfloat/multilingual-e5-large\" dim = 1024
214#
215# A different-dim model needs a fresh data dir; pond enforces this at the
216# schema boundary.
217#
218# [embeddings]
219# model = \"intfloat/multilingual-e5-small\"
220# dim = 384
221
222# Search tuning. Leave unset for Lance defaults; set when tuning IVF_PQ recall
223# against a corpus.
224#
225# [search]
226# nprobes = 16
227# refine_factor = 2
228
229# Storage maintenance. Tunes the compaction + cleanup pass that runs inside
230# `pond sync` and `pond index optimize`.
231#
232# - `compaction_fragment_cap` is the per-task fragment-count backstop: a
233# planned compaction task touching at least this many fragments always runs
234# even when the write-amplification veto would skip it. Default 64; 0
235# disables the veto and runs every task Lance plans.
236# - `cleanup_older_than` is the manifest-retention window for the safe cleanup
237# pass. Accepts `Ns` / `Nm` / `Nh` / `Nd` (default `1d`, floor `1h` - it is
238# what protects in-flight readers). Versions older than this are reclaimed
239# by Lance's OCC-coordinated GC.
240# - `index_lag_threshold` is the minimum unindexed-fragment count before a
241# per-intent append/rebuild runs in `pond index optimize`; the brute-force
242# fallback keeps queries correct while fragments accumulate. Default 4.
243#
244# [maintenance]
245# compaction_fragment_cap = 64
246# cleanup_older_than = \"1d\"
247# index_lag_threshold = 4
248
249# Long-running process caps. Both accept either a plain byte count or a
250# humansize-style suffix (\"128 MiB\", \"1 GiB\"). Both are optional - leave
251# unset to let pond pick the backend-aware default:
252# local FS : index_cache = 256 MiB, metadata_cache = 128 MiB
253# remote : index_cache = 2 GiB, metadata_cache = 512 MiB
254# Lance's library defaults (6 GiB / 1 GiB) are too generous for a per-session
255# `pond mcp` process; tightening them is what keeps RSS under the 500 MiB target
256# without measurable latency regressions on typical agent-history corpora.
257#
258# [runtime]
259# index_cache_bytes = \"256 MiB\"
260# metadata_cache_bytes = \"128 MiB\"
261
262# Storage address and credentials (spec.md#storage-url-grammar).
263#
264# `path` is the default destination used when `--storage-path` (env
265# `POND_STORAGE_PATH`) is not passed. Absent = the platform-local data dir.
266# Addresses are URLs; the `s3+https` form carries the endpoint, bucket, and
267# prefix in one token:
268#
269# /abs/path or ~/path local filesystem
270# s3://bucket/prefix AWS S3 (ambient credential chain)
271# s3+https://host/bucket/prefix S3-compatible endpoint (Hetzner, R2, B2, MinIO)
272# gs://bucket/prefix Google Cloud Storage
273# az://account/container/prefix Azure Blob
274#
275# Credentials live in `[creds.<name>]` sets and bind to URLs by `scope`
276# prefix - longest match wins (spec.md#creds-scope-match); a set without
277# `scope` matches any URL. With no matching set, the standard cloud SDK
278# chain applies (AWS_* env, shared credentials file, instance metadata).
279# Secrets never go in URLs or CLI flags; besides inline values,
280# `access_key_id_file` / `secret_access_key_file` read a file and
281# `secret_access_key_command` runs a command (e.g. `op read ...`). `extra`
282# holds verbatim `object_store` options pond has not typed.
283#
284# Every field mirrors to env: `POND_STORAGE_PATH`, `POND_CREDS_<NAME>_<FIELD>`
285# (set names are lowercase alphanumeric, so the env grammar is unambiguous).
286# Precedence: CLI flag > POND_* env > this file > ambient cloud chain.
287# Probe a destination end-to-end with `pond storage check`.
288#
289# Future wrap: pond is single-namespace in v1 (spec.md#wire-namespace-resolution);
290# `[storage]` is flat here on the assumption of one bucket per pond. When
291# multi-namespace pond lands this becomes `[namespaces.<ns>.storage]`.
292#
293# [storage]
294# path = \"s3+https://nbg1.your-objectstorage.com/my-pond\"
295#
296# [creds.default]
297# access_key_id = \"...\"
298# secret_access_key = \"...\"
299";
300
301#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
303#[serde(deny_unknown_fields)]
304pub struct Config {
305 #[serde(default)]
306 pub embeddings: EmbeddingsConfig,
307 #[serde(default)]
308 pub search: SearchConfig,
309 #[serde(default)]
310 pub maintenance: MaintenanceConfig,
311 #[serde(default)]
312 pub runtime: RuntimeConfig,
313 #[serde(default)]
319 pub sources: BTreeMap<String, Value>,
320 #[serde(default)]
323 pub storage: StorageConfig,
324 #[serde(default)]
328 pub creds: BTreeMap<String, CredsSet>,
329}
330
331#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
335#[serde(deny_unknown_fields)]
336pub struct StorageConfig {
337 #[serde(default)]
338 pub path: Option<String>,
339}
340
341#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
346#[serde(deny_unknown_fields)]
347pub struct CredsSet {
348 #[serde(default)]
350 pub scope: Option<String>,
351 #[serde(default, deserialize_with = "lenient_string")]
355 pub access_key_id: Option<String>,
356 #[serde(default)]
357 pub access_key_id_file: Option<PathBuf>,
358 #[serde(default, deserialize_with = "lenient_string")]
359 pub secret_access_key: Option<String>,
360 #[serde(default)]
361 pub secret_access_key_file: Option<PathBuf>,
362 #[serde(default)]
363 pub secret_access_key_command: Option<String>,
364 #[serde(default, deserialize_with = "lenient_string")]
365 pub region: Option<String>,
366 #[serde(default)]
367 pub virtual_hosted_style_request: Option<bool>,
368 #[serde(default)]
369 pub extra: BTreeMap<String, String>,
370}
371
372#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
377#[serde(deny_unknown_fields, default)]
378pub struct RuntimeConfig {
379 #[serde(default, deserialize_with = "deserialize_byte_size_opt")]
380 pub index_cache_bytes: Option<usize>,
381 #[serde(default, deserialize_with = "deserialize_byte_size_opt")]
382 pub metadata_cache_bytes: Option<usize>,
383}
384
385#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
387#[serde(deny_unknown_fields)]
388pub struct SearchConfig {
389 #[serde(default)]
390 pub nprobes: Option<usize>,
391 #[serde(default)]
392 pub refine_factor: Option<u32>,
393}
394
395#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
400#[serde(deny_unknown_fields)]
401pub struct MaintenanceConfig {
402 #[serde(default)]
407 pub compaction_fragment_cap: Option<usize>,
408 #[serde(default)]
413 pub cleanup_older_than: Option<String>,
414 #[serde(default)]
420 pub index_lag_threshold: Option<usize>,
421}
422
423#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
432#[serde(deny_unknown_fields, default)]
433pub struct EmbeddingsConfig {
434 pub model: String,
437 pub dim: usize,
441}
442
443impl Default for EmbeddingsConfig {
444 fn default() -> Self {
445 Self {
446 model: crate::embed::DEFAULT_MODEL_ID.to_owned(),
447 dim: crate::sessions::DEFAULT_EMBEDDING_DIM,
448 }
449 }
450}
451
452pub fn default_storage_path(xdg_data_home: Option<PathBuf>, home: Option<PathBuf>) -> Result<Url> {
458 if let Some(xdg) = xdg_data_home.filter(|path| path.is_absolute()) {
459 return url_for_path(xdg.join("pond"));
460 }
461 if let Some(home) = home {
462 return url_for_path(home.join(".local").join("share").join("pond"));
463 }
464 url_for_path(PathBuf::from(".pond"))
466}
467
468pub fn default_config_path(xdg_config_home: Option<PathBuf>, home: Option<PathBuf>) -> PathBuf {
473 if let Some(xdg) = xdg_config_home.filter(|path| path.is_absolute()) {
474 return xdg.join("pond").join("config.toml");
475 }
476 if let Some(home) = home {
477 return home.join(".config").join("pond").join("config.toml");
478 }
479 PathBuf::from(".pond.toml")
480}
481
482impl Config {
483 pub fn load(path: impl AsRef<Path>) -> Result<Self> {
491 Ok(Self::load_with_provenance(path)?.0)
492 }
493
494 pub fn load_str(body: &str) -> Result<Self> {
499 let figment = Figment::new().merge(Toml::string(body)).merge(env_mirror());
500 let config: Self = figment
501 .extract_lossy()
502 .map_err(|error| anyhow!("failed to load config: {error}"))?;
503 config.embeddings.validate()?;
504 config.validate_creds()?;
505 Ok(config)
506 }
507
508 pub fn load_with_provenance(path: impl AsRef<Path>) -> Result<(Self, Figment)> {
511 let path = path.as_ref();
512 let figment = Figment::new().merge(Toml::file(path)).merge(env_mirror());
513 let config: Self = figment.extract_lossy().map_err(|error| {
517 if let Some(recipe) = detect_legacy_storage(path) {
518 return anyhow!("{recipe}");
519 }
520 anyhow!("failed to load config {}: {error}", path.display())
523 })?;
524 config.embeddings.validate()?;
525 config.validate_creds()?;
526 config.embeddings.install_runtime();
527 if let Some(threshold) = config.maintenance.index_lag_threshold {
528 crate::substrate::init_index_lag_threshold(threshold);
529 }
530 Ok((config, figment))
534 }
535
536 fn validate_creds(&self) -> Result<()> {
541 let mut scopeless: Option<&str> = None;
542 let mut scopes: BTreeMap<String, &str> = BTreeMap::new();
543 for (name, set) in &self.creds {
544 let mut chars = name.chars();
548 let head_ok = chars.next().is_some_and(|c| c.is_ascii_lowercase());
549 if !head_ok
550 || name.len() > 16
551 || !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
552 {
553 bail!(
554 "creds set name {name:?} must match [a-z][a-z0-9]{{0,15}} (lowercase alphanumeric, no separators)"
555 );
556 }
557 if set.access_key_id.is_some() && set.access_key_id_file.is_some() {
558 bail!("[creds.{name}] sets both access_key_id and access_key_id_file; pick one");
559 }
560 let secret_variants = [
561 set.secret_access_key.is_some(),
562 set.secret_access_key_file.is_some(),
563 set.secret_access_key_command.is_some(),
564 ]
565 .iter()
566 .filter(|present| **present)
567 .count();
568 if secret_variants > 1 {
569 bail!(
570 "[creds.{name}] sets more than one of secret_access_key / secret_access_key_file / secret_access_key_command; pick one"
571 );
572 }
573 match set.scope.as_deref() {
574 None => {
575 if let Some(other) = scopeless {
576 bail!(
577 "[creds.{other}] and [creds.{name}] are both scope-less; at most one catch-all set is allowed - add a `scope` to one"
578 );
579 }
580 scopeless = Some(name);
581 }
582 Some(scope) => {
583 let canonical = crate::substrate::parse_scope(scope)
588 .map(|url| url.as_str().trim_end_matches('/').to_owned())
589 .with_context(|| {
590 format!("[creds.{name}] scope {scope:?} is not a valid URL prefix")
591 })?;
592 if let Some(other) = scopes.insert(canonical, name) {
593 bail!(
594 "[creds.{other}] and [creds.{name}] declare the same scope {scope:?}; merge them or narrow one"
595 );
596 }
597 }
598 }
599 }
600 Ok(())
601 }
602
603 pub fn resolve_sources(&self, adapter: Option<&str>) -> Result<Vec<(String, Value)>> {
612 match adapter {
613 None => Ok(self
614 .sources
615 .iter()
616 .filter_map(|(name, blob)| take_enabled(name, blob))
617 .collect()),
618 Some(name) => {
619 let blob = self
620 .sources
621 .get(name)
622 .ok_or_else(|| anyhow!("no [sources.{name}] entry in config"))?;
623 take_enabled(name, blob).map(|entry| vec![entry]).ok_or_else(|| {
624 anyhow!(
625 "source [{name}] is disabled (enabled = false); run `pond sync {name}` to re-enable"
626 )
627 })
628 }
629 }
630 }
631
632 pub fn disabled_source_names(&self) -> Vec<&str> {
637 self.sources
638 .iter()
639 .filter_map(|(name, blob)| {
640 let enabled = blob
641 .get("enabled")
642 .and_then(Value::as_bool)
643 .unwrap_or(false);
644 if enabled { None } else { Some(name.as_str()) }
645 })
646 .collect()
647 }
648}
649
650fn env_mirror() -> Env {
656 Env::prefixed("POND_")
659 .filter(|key| {
660 let key = key.as_str().to_ascii_lowercase();
661 key == "storage_path" || (key.starts_with("creds_") && !key.ends_with("_extra"))
665 })
666 .map(|key| {
667 let key = key.as_str().to_ascii_lowercase();
671 let dots = if key.starts_with("creds_") { 2 } else { 1 };
672 key.replacen('_', ".", dots).into()
673 })
674}
675
676pub const LEGACY_ENDPOINT_KEYS: &[&str] = &["aws_endpoint", "endpoint"];
682pub const LEGACY_ACCESS_KEY_KEYS: &[&str] = &["aws_access_key_id", "access_key_id"];
683pub const LEGACY_SECRET_KEY_KEYS: &[&str] = &["aws_secret_access_key", "secret_access_key"];
684
685fn detect_legacy_storage(path: &Path) -> Option<String> {
690 let text = std::fs::read_to_string(path).ok()?;
691 let value: toml::Value = toml::from_str(&text).ok()?;
692 let storage = value.get("storage")?.as_table()?;
693 if storage.is_empty() || storage.keys().all(|key| key == "path") {
694 return None;
695 }
696 let get = |names: &[&str]| {
697 storage.iter().find_map(|(key, value)| {
698 names
699 .iter()
700 .any(|name| key.eq_ignore_ascii_case(name))
701 .then(|| value.as_str().unwrap_or_default().to_owned())
702 })
703 };
704 let endpoint = get(LEGACY_ENDPOINT_KEYS);
705 let host = endpoint
706 .as_deref()
707 .and_then(|e| e.split("://").nth(1))
708 .unwrap_or("<endpoint-host>");
709 let mut recipe = format!(
712 "config {} uses the old [storage] passthrough map; rewrite it as:\n\n[storage]\npath = \"s3+https://{host}/<bucket>/<prefix>\"\n\n[creds.default]\n",
713 path.display(),
714 );
715 recipe.push_str("access_key_id = \"...\" # copy from the old [storage] section\n");
716 recipe.push_str("secret_access_key = \"...\" # copy from the old [storage] section\n");
717 recipe.push_str(
718 "\n(the endpoint and bucket fold into the URL; allow_http is scheme-derived; virtual-hosted addressing defaults on; the region is autodetected - append ?region=<x> to the URL only if your store insists. `pond storage check` verifies the result end-to-end, and `pond init` can apply this rewrite for you)",
719 );
720 Some(recipe)
721}
722
723fn take_enabled(name: &str, blob: &Value) -> Option<(String, Value)> {
727 let enabled = blob
728 .get("enabled")
729 .and_then(Value::as_bool)
730 .unwrap_or(false);
731 if !enabled {
732 return None;
733 }
734 let mut clean = blob.clone();
735 if let Some(obj) = clean.as_object_mut() {
736 obj.remove("enabled");
737 }
738 Some((name.to_owned(), clean))
739}
740
741pub fn expand_home_under(path: &Path, home: &Path) -> PathBuf {
748 let Some(text) = path.to_str() else {
749 return path.to_path_buf();
750 };
751 let home_text = home.to_string_lossy();
752 let expanded = shellexpand::full_with_context_no_errors(
753 text,
754 || Some(home_text.clone()),
755 |var| std::env::var(var).ok(),
756 );
757 PathBuf::from(expanded.as_ref())
758}
759
760pub fn contract_home_under(path: &Path, home: &Path) -> PathBuf {
765 match path.strip_prefix(home) {
766 Ok(rest) if rest.as_os_str().is_empty() => PathBuf::from("~"),
767 Ok(rest) => Path::new("~").join(rest),
768 Err(_) => path.to_path_buf(),
769 }
770}
771
772pub fn contract_home(path: &Path) -> PathBuf {
776 match std::env::var_os("HOME") {
777 Some(home) => contract_home_under(path, Path::new(&home)),
778 None => path.to_path_buf(),
779 }
780}
781
782impl EmbeddingsConfig {
783 pub fn validate(&self) -> Result<()> {
788 if self.model.trim().is_empty() {
789 bail!("embeddings.model must be a non-empty HuggingFace model id");
790 }
791 if self.dim == 0 || !self.dim.is_multiple_of(8) {
792 bail!(
793 "embeddings.dim = {} must be a positive multiple of 8 (IVF_PQ subspace stride)",
794 self.dim,
795 );
796 }
797 Ok(())
798 }
799
800 pub fn install_runtime(&self) {
804 crate::embed::init_model_id(self.model.clone());
805 crate::sessions::init_embedding_dim(self.dim);
806 }
807}
808
809#[cfg(test)]
810mod tests {
811 #![allow(clippy::expect_used, clippy::unwrap_used, clippy::result_large_err)]
814
815 use super::*;
816 use serde_json::Value;
817 use tempfile::TempDir;
818
819 #[test]
820 fn validate_catches_empty_model_and_bad_dim() {
821 assert!(EmbeddingsConfig::default().validate().is_ok());
822 let bad_model = EmbeddingsConfig {
825 model: " ".to_owned(),
826 dim: 768,
827 };
828 assert!(bad_model.validate().is_err());
829 let bad_dim = EmbeddingsConfig {
831 model: "intfloat/multilingual-e5-base".to_owned(),
832 dim: 100,
833 };
834 assert!(bad_dim.validate().is_err());
835 let zero_dim = EmbeddingsConfig {
837 model: "intfloat/multilingual-e5-base".to_owned(),
838 dim: 0,
839 };
840 assert!(zero_dim.validate().is_err());
841 }
842
843 #[test]
844 fn config_load_missing_file_falls_back_to_builtin() {
845 let config = Config::load("/nonexistent/pond-config-xyz.toml").unwrap();
846 assert_eq!(config.embeddings, EmbeddingsConfig::default());
847 }
848
849 #[test]
850 fn default_config_toml_loads_to_the_builtin_defaults() {
851 let dir = TempDir::new().unwrap();
852 let path = dir.path().join("config.toml");
853 std::fs::write(&path, DEFAULT_CONFIG_TOML).unwrap();
854 let config = Config::load(&path).unwrap();
857 assert_eq!(config.embeddings, EmbeddingsConfig::default());
858 assert_eq!(config.embeddings.model, crate::embed::DEFAULT_MODEL_ID);
859 assert_eq!(
860 config.embeddings.dim,
861 crate::sessions::DEFAULT_EMBEDDING_DIM
862 );
863 }
864
865 #[test]
866 fn default_storage_path_follows_xdg_then_home() {
867 let resolved =
869 default_storage_path(Some(PathBuf::from("/xdg")), Some(PathBuf::from("/home")))
870 .unwrap();
871 assert!(is_local(&resolved));
872 assert_eq!(local_path(&resolved).unwrap(), PathBuf::from("/xdg/pond"));
873
874 let resolved = default_storage_path(
876 Some(PathBuf::from("relative")),
877 Some(PathBuf::from("/home")),
878 )
879 .unwrap();
880 assert_eq!(
881 local_path(&resolved).unwrap(),
882 PathBuf::from("/home/.local/share/pond"),
883 );
884
885 let resolved = default_storage_path(None, None).unwrap();
889 assert!(is_local(&resolved));
890 assert!(
891 local_path(&resolved).unwrap().ends_with(".pond"),
892 "fallback path should end with .pond: {resolved}",
893 );
894 }
895
896 #[test]
897 fn expand_home_under_handles_tilde_forms() {
898 let home = Path::new("/srv/me");
899 assert_eq!(
900 expand_home_under(Path::new("~"), home),
901 PathBuf::from("/srv/me")
902 );
903 assert_eq!(
904 expand_home_under(Path::new("~/.codex/sessions"), home),
905 PathBuf::from("/srv/me/.codex/sessions"),
906 );
907 assert_eq!(
909 expand_home_under(Path::new("/etc/passwd"), home),
910 PathBuf::from("/etc/passwd"),
911 );
912 assert_eq!(
914 expand_home_under(Path::new("~user/elsewhere"), home),
915 PathBuf::from("~user/elsewhere"),
916 );
917 }
918
919 #[test]
920 fn expand_home_under_handles_env_vars() {
921 figment::Jail::expect_with(|jail| {
923 jail.set_env("POND_TEST_EXPAND_DIR", "/srv/data");
924 let home = Path::new("/srv/me");
925 assert_eq!(
926 expand_home_under(Path::new("$POND_TEST_EXPAND_DIR/pond"), home),
927 PathBuf::from("/srv/data/pond"),
928 );
929 assert_eq!(
930 expand_home_under(Path::new("${POND_TEST_EXPAND_DIR}/pond"), home),
931 PathBuf::from("/srv/data/pond"),
932 );
933 assert_eq!(
935 expand_home_under(Path::new("$POND_TEST_UNSET_VAR/x"), home),
936 PathBuf::from("$POND_TEST_UNSET_VAR/x"),
937 );
938 Ok(())
939 });
940 }
941
942 #[test]
943 fn contract_home_under_inverts_expansion() {
944 let home = Path::new("/srv/me");
945 assert_eq!(
946 contract_home_under(Path::new("/srv/me/.local/share/pond"), home),
947 PathBuf::from("~/.local/share/pond"),
948 );
949 assert_eq!(
950 contract_home_under(Path::new("/srv/me"), home),
951 PathBuf::from("~")
952 );
953 assert_eq!(
955 contract_home_under(Path::new("/etc/passwd"), home),
956 PathBuf::from("/etc/passwd"),
957 );
958 }
959
960 #[test]
961 fn resolve_sources_returns_one_or_all_or_errors() {
962 let temp = TempDir::new().unwrap();
963 let body = "\
964[sources.claude-code]
965enabled = true
966path = \"/srv/claude\"
967
968[sources.codex-cli]
969enabled = true
970path = \"/srv/codex\"
971
972[sources.opencode]
973enabled = false
974";
975 let path = temp.path().join("config.toml");
976 std::fs::write(&path, body).expect("write config");
977 let config = Config::load(&path).unwrap();
978
979 let all = config.resolve_sources(None).unwrap();
981 assert_eq!(all.len(), 2);
982 let names: Vec<_> = all.iter().map(|(n, _)| n.as_str()).collect();
983 assert!(names.contains(&"claude-code"));
984 assert!(names.contains(&"codex-cli"));
985 for (_, blob) in &all {
987 assert!(blob.get("enabled").is_none(), "enabled should be stripped");
988 }
989
990 let one = config.resolve_sources(Some("codex-cli")).unwrap();
992 assert_eq!(one.len(), 1);
993 assert_eq!(one[0].0, "codex-cli");
994 assert_eq!(
995 one[0].1.get("path").and_then(Value::as_str),
996 Some("/srv/codex"),
997 );
998
999 let disabled = config.resolve_sources(Some("opencode"));
1001 let err = disabled
1002 .expect_err("disabled adapter must error")
1003 .to_string();
1004 assert!(err.contains("enabled = false"), "got: {err}");
1005 assert!(err.contains("pond sync opencode"), "got: {err}");
1006
1007 assert!(config.resolve_sources(Some("nope")).is_err());
1009
1010 assert_eq!(config.disabled_source_names(), vec!["opencode"]);
1012 }
1013
1014 #[test]
1015 fn memory_uri_is_classified_as_remote() {
1016 let url = Url::parse("memory:///pond-remote-test").expect("memory uri parses");
1017 assert!(
1018 !is_local(&url),
1019 "memory:// is not a local-filesystem URL: {url}",
1020 );
1021 assert!(
1022 local_path(&url).is_none(),
1023 "local_path must return None for non-file schemes",
1024 );
1025 }
1026
1027 #[test]
1032 fn storage_and_creds_round_trip() {
1033 figment::Jail::expect_with(|jail| {
1034 jail.create_file(
1035 "config.toml",
1036 r#"
1037[storage]
1038path = "s3+https://nbg1.example.com/my-pond"
1039
1040[creds.default]
1041access_key_id = "AKIA123"
1042secret_access_key = "shh"
1043
1044[creds.work]
1045scope = "s3+https://fsn1.example.com/work-pond/"
1046access_key_id = "AKIA456"
1047secret_access_key_command = "op read op://vault/pond/secret"
1048region = "fsn1"
1049virtual_hosted_style_request = false
1050extra = { request_timeout = "60 seconds" }
1051"#,
1052 )?;
1053 let config = Config::load("config.toml").expect("config loads");
1054 assert_eq!(
1055 config.storage.path.as_deref(),
1056 Some("s3+https://nbg1.example.com/my-pond"),
1057 );
1058 assert_eq!(config.creds.len(), 2);
1059 let work = &config.creds["work"];
1060 assert_eq!(
1061 work.secret_access_key_command.as_deref(),
1062 Some("op read op://vault/pond/secret"),
1063 );
1064 assert_eq!(work.virtual_hosted_style_request, Some(false));
1065 assert_eq!(work.extra["request_timeout"], "60 seconds");
1066 Ok(())
1067 });
1068 }
1069
1070 #[test]
1071 fn creds_validators_reject_bad_shapes() {
1072 let cases: &[(&str, &str)] = &[
1073 ("[creds.a]\nacces_key_id = \"x\"\n", "acces_key_id"),
1075 ("[creds.my_set]\naccess_key_id = \"x\"\n", "[a-z][a-z0-9]"),
1077 ("[creds.A1]\naccess_key_id = \"x\"\n", "[a-z][a-z0-9]"),
1078 (
1080 "[creds.a]\nsecret_access_key = \"x\"\nsecret_access_key_command = \"cat\"\n",
1081 "more than one",
1082 ),
1083 (
1084 "[creds.a]\naccess_key_id = \"x\"\naccess_key_id_file = \"/k\"\n",
1085 "pick one",
1086 ),
1087 (
1089 "[creds.a]\naccess_key_id = \"x\"\n[creds.b]\naccess_key_id = \"y\"\n",
1090 "scope-less",
1091 ),
1092 (
1095 "[creds.a]\nscope = \"s3+https://h:443/b/\"\naccess_key_id = \"x\"\n[creds.b]\nscope = \"s3+https://h/b\"\naccess_key_id = \"y\"\n",
1096 "same scope",
1097 ),
1098 ];
1099 figment::Jail::expect_with(|jail| {
1100 for (body, needle) in cases {
1101 jail.create_file("config.toml", body)?;
1102 let err = Config::load("config.toml").expect_err(body).to_string();
1103 assert!(
1104 err.contains(needle),
1105 "want {needle:?} in error for {body:?}, got: {err}",
1106 );
1107 }
1108 Ok(())
1109 });
1110 }
1111
1112 #[test]
1113 fn legacy_storage_map_errors_with_the_rewrite_recipe() {
1114 figment::Jail::expect_with(|jail| {
1115 jail.create_file(
1116 "config.toml",
1117 r#"
1118[storage]
1119AWS_ACCESS_KEY_ID = "AKIA123"
1120AWS_SECRET_ACCESS_KEY = "shh"
1121AWS_REGION = "nbg1"
1122AWS_ENDPOINT = "https://ttq.nbg1.your-objectstorage.com"
1123aws_virtual_hosted_style_request = "true"
1124"#,
1125 )?;
1126 let err = Config::load("config.toml")
1127 .expect_err("legacy map must error")
1128 .to_string();
1129 assert!(err.contains("old [storage] passthrough map"), "got: {err}");
1131 assert!(
1132 err.contains("s3+https://ttq.nbg1.your-objectstorage.com/"),
1133 "endpoint host must fold into the URL recipe, got: {err}",
1134 );
1135 assert!(!err.contains("AKIA123"), "got: {err}");
1138 assert!(!err.contains("\"shh\""), "got: {err}");
1139 assert!(err.contains("access_key_id = \"...\""), "got: {err}");
1140 assert!(!err.contains("region ="), "got: {err}");
1144 assert!(err.contains("?region="), "got: {err}");
1145 assert!(err.contains("pond storage check"), "got: {err}");
1146 Ok(())
1147 });
1148 }
1149
1150 #[test]
1151 fn env_mirror_layers_over_file() {
1152 figment::Jail::expect_with(|jail| {
1153 jail.create_file(
1154 "config.toml",
1155 r#"
1156[storage]
1157path = "/from-file"
1158
1159[creds.work]
1160scope = "s3://file-bucket/"
1161access_key_id = "from-file"
1162region = "file-region"
1163"#,
1164 )?;
1165 jail.set_env("POND_STORAGE_PATH", "/from-env");
1167 jail.set_env("POND_CREDS_WORK_ACCESS_KEY_ID", "from-env");
1168 jail.set_env("POND_CREDS_WORK_SECRET_ACCESS_KEY", "12345");
1170 jail.set_env("POND_CREDS_CI_ACCESS_KEY_ID", "ci-key");
1172 let config = Config::load("config.toml").expect("env+file config loads");
1173 assert_eq!(config.storage.path.as_deref(), Some("/from-env"));
1174 let work = &config.creds["work"];
1175 assert_eq!(work.access_key_id.as_deref(), Some("from-env"));
1176 assert_eq!(work.secret_access_key.as_deref(), Some("12345"));
1177 assert_eq!(work.region.as_deref(), Some("file-region"));
1178 assert_eq!(work.scope.as_deref(), Some("s3://file-bucket/"));
1179 assert_eq!(config.creds["ci"].access_key_id.as_deref(), Some("ci-key"));
1180 Ok(())
1181 });
1182 }
1183}