1use std::{
11 collections::BTreeMap,
12 path::{Path, PathBuf},
13};
14
15use anyhow::{Context, Result, anyhow, bail};
16use figment::{
17 Figment,
18 providers::{Env, Format, Toml},
19};
20use serde::{Deserialize, Deserializer, Serialize, de};
21use serde_json::Value;
22use url::Url;
23
24fn parse_byte_size(raw: &str) -> Result<usize, String> {
29 let trimmed = raw.trim();
30 if trimmed.is_empty() {
31 return Err("byte-size value is empty".to_owned());
32 }
33 let split = trimmed
34 .find(|c: char| c.is_ascii_alphabetic())
35 .unwrap_or(trimmed.len());
36 let (number, unit) = trimmed.split_at(split);
37 let number: f64 = number
38 .trim()
39 .parse()
40 .map_err(|_| format!("byte-size value {raw:?} is not a number"))?;
41 if !number.is_finite() || number < 0.0 {
42 return Err(format!("byte-size value {raw:?} must be non-negative"));
43 }
44 let multiplier: f64 = match unit.trim().to_ascii_lowercase().as_str() {
45 "" | "b" => 1.0,
46 "k" | "kb" => 1_000.0,
47 "kib" => 1_024.0,
48 "m" | "mb" => 1_000_000.0,
49 "mib" => 1_048_576.0,
50 "g" | "gb" => 1_000_000_000.0,
51 "gib" => 1_073_741_824.0,
52 "tib" => 1_099_511_627_776.0,
53 other => {
54 return Err(format!(
55 "byte-size unit {other:?} not recognized (try MiB / GiB)"
56 ));
57 }
58 };
59 let bytes = number * multiplier;
60 if !bytes.is_finite() || bytes > usize::MAX as f64 {
61 return Err(format!("byte-size value {raw:?} overflows usize"));
62 }
63 Ok(bytes as usize)
64}
65
66fn lenient_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
70where
71 D: Deserializer<'de>,
72{
73 #[derive(Deserialize)]
74 #[serde(untagged)]
75 enum Repr {
76 Text(String),
77 Int(i64),
78 Float(f64),
79 Bool(bool),
80 }
81 Ok(
82 Option::<Repr>::deserialize(deserializer)?.map(|repr| match repr {
83 Repr::Text(value) => value,
84 Repr::Int(value) => value.to_string(),
85 Repr::Float(value) => value.to_string(),
86 Repr::Bool(value) => value.to_string(),
87 }),
88 )
89}
90
91fn deserialize_byte_size_opt<'de, D>(deserializer: D) -> Result<Option<usize>, D::Error>
92where
93 D: Deserializer<'de>,
94{
95 #[derive(Deserialize)]
96 #[serde(untagged)]
97 enum Repr {
98 Bytes(u64),
99 Text(String),
100 }
101 let repr: Option<Repr> = Option::deserialize(deserializer)?;
102 match repr {
103 None => Ok(None),
104 Some(Repr::Bytes(value)) => usize::try_from(value).map(Some).map_err(de::Error::custom),
105 Some(Repr::Text(value)) => parse_byte_size(&value).map(Some).map_err(de::Error::custom),
106 }
107}
108
109pub fn is_local(url: &Url) -> bool {
113 matches!(url.scheme(), "file" | "file+uring")
114}
115
116pub fn local_path(url: &Url) -> Option<PathBuf> {
118 if is_local(url) {
119 url.to_file_path().ok()
120 } else {
121 None
122 }
123}
124
125pub fn child_uri(base: &Url, suffix: &str) -> String {
131 if let Some(path) = local_path(base) {
135 return path.join(suffix).display().to_string();
136 }
137 format!("{}/{suffix}", base.as_str().trim_end_matches('/'))
138}
139
140pub fn display(url: &Url) -> String {
144 if let Some(path) = local_path(url) {
145 contract_home(&path).display().to_string()
146 } else {
147 url.to_string()
148 }
149}
150
151pub fn url_for_path(path: impl AsRef<Path>) -> Result<Url> {
156 let path = path.as_ref();
157 let absolute = if path.is_absolute() {
158 path.to_path_buf()
159 } else {
160 std::path::absolute(path)
161 .with_context(|| format!("failed to absolutize {}", path.display()))?
162 };
163 Url::from_file_path(&absolute).map_err(|()| {
164 anyhow!(
165 "failed to convert path {} into a file:// URL",
166 absolute.display()
167 )
168 })
169}
170
171pub const DEFAULT_CONFIG_TOML: &str = "\
175# pond configuration.
176#
177# pond ships built-in defaults, so every setting here is optional - delete this
178# file and pond still works. Uncomment and edit to override.
179
180# Where pond looks for source data to import. One entry per adapter type
181# (`claude-code`, `codex-cli`, ...). `pond sync` with no arguments syncs every
182# entry; `pond sync <adapter>` syncs just one. With an empty `[sources]`,
183# `pond sync` runs an interactive discovery against the known default paths
184# and writes the picks back here.
185#
186# Future wrap: pond is single-namespace in v1 (spec.md#wire-namespace-resolution); `[sources]` is
187# flat here. When multi-namespace pond lands, source registration becomes
188# per-tenant under `[namespaces.<ns>.sources.<adapter>]`. Pre-v1 the schema
189# is breakable; the rename is operationally free until a real second tenant
190# exists.
191#
192# [sources.claude-code]
193# enabled = true
194# path = \"~/.claude/projects\"
195#
196# [sources.codex-cli]
197# enabled = true
198# path = \"~/.codex/sessions\"
199#
200# Set `enabled = false` to keep the section but skip it on `pond sync`;
201# re-enable via `pond sync <adapter>`.
202
203# Embeddings. Search runs hybrid (vector + FTS) whenever the store has any
204# vectors, and FTS-only otherwise - the model loads lazily on the first hybrid
205# query, so there's no cost on FTS-only corpora. `model` selects the
206# HuggingFace XLM-RoBERTa model; `dim` declares its output width and is baked
207# into the messages.vector schema on table creation - it must equal the
208# model's hidden_size and be a multiple of 8 (IVF_PQ subspace stride).
209#
210# Common pairings:
211# model = \"intfloat/multilingual-e5-small\" dim = 384 (default)
212# model = \"intfloat/multilingual-e5-base\" dim = 768
213# model = \"intfloat/multilingual-e5-large\" dim = 1024
214#
215# A different-dim model needs a fresh data dir; pond enforces this at the
216# schema boundary.
217#
218# [embeddings]
219# model = \"intfloat/multilingual-e5-small\"
220# dim = 384
221
222# Search tuning. Leave unset for Lance defaults; set when tuning IVF_PQ recall
223# against a corpus.
224#
225# [search]
226# nprobes = 16
227# refine_factor = 2
228
229# Storage maintenance. Tunes the compaction + cleanup pass that runs inside
230# `pond sync` and `pond index optimize`.
231#
232# - `compaction_fragment_cap` is the per-task fragment-count backstop: a
233# planned compaction task touching at least this many fragments always runs
234# even when the write-amplification veto would skip it. Default 64; 0
235# disables the veto and runs every task Lance plans.
236# - `cleanup_older_than` is the manifest-retention window for the safe cleanup
237# pass. Accepts `Ns` / `Nm` / `Nh` / `Nd` (default `1d`, floor `1h` - it is
238# what protects in-flight readers). Versions older than this are reclaimed
239# by Lance's OCC-coordinated GC.
240# - `index_lag_threshold` is the minimum unindexed-fragment count before a
241# per-intent append/rebuild runs in `pond index optimize`; the brute-force
242# fallback keeps queries correct while fragments accumulate. Default 4.
243#
244# [maintenance]
245# compaction_fragment_cap = 64
246# cleanup_older_than = \"1d\"
247# index_lag_threshold = 4
248
249# Long-running process caps. Both accept either a plain byte count or a
250# humansize-style suffix (\"128 MiB\", \"1 GiB\"). Both are optional - leave
251# unset to let pond pick the backend-aware default:
252# local FS : index_cache = 256 MiB, metadata_cache = 128 MiB
253# remote : index_cache = 2 GiB, metadata_cache = 512 MiB
254# Lance's library defaults (6 GiB / 1 GiB) are too generous for a per-session
255# `pond mcp` process; tightening them is what keeps RSS under the 500 MiB target
256# without measurable latency regressions on typical agent-history corpora.
257#
258# [runtime]
259# index_cache_bytes = \"256 MiB\"
260# metadata_cache_bytes = \"128 MiB\"
261
262# Storage address and credentials (spec.md#storage-url-grammar).
263#
264# `path` is the default destination used when `--storage-path` (env
265# `POND_STORAGE_PATH`) is not passed. Absent = the platform-local data dir.
266# Addresses are URLs; the `s3+https` form carries the endpoint, bucket, and
267# prefix in one token:
268#
269# /abs/path or ~/path local filesystem
270# s3://bucket/prefix AWS S3 (ambient credential chain)
271# s3+https://host/bucket/prefix S3-compatible endpoint (Hetzner, R2, B2, MinIO)
272# gs://bucket/prefix Google Cloud Storage
273# az://account/container/prefix Azure Blob
274#
275# Credentials live in `[creds.<name>]` sets and bind to URLs by `scope`
276# prefix - longest match wins (spec.md#creds-scope-match); a set without
277# `scope` matches any URL. With no matching set, the standard cloud SDK
278# chain applies (AWS_* env, shared credentials file, instance metadata).
279# Secrets never go in URLs or CLI flags; besides inline values,
280# `access_key_id_file` / `secret_access_key_file` read a file and
281# `secret_access_key_command` runs a command (e.g. `op read ...`). `extra`
282# holds verbatim `object_store` options pond has not typed.
283#
284# Every field mirrors to env: `POND_STORAGE_PATH`, `POND_CREDS_<NAME>_<FIELD>`
285# (set names are lowercase alphanumeric, so the env grammar is unambiguous).
286# Precedence: CLI flag > POND_* env > this file > ambient cloud chain.
287# Probe a destination end-to-end with `pond storage check`.
288#
289# Future wrap: pond is single-namespace in v1 (spec.md#wire-namespace-resolution);
290# `[storage]` is flat here on the assumption of one bucket per pond. When
291# multi-namespace pond lands this becomes `[namespaces.<ns>.storage]`.
292#
293# [storage]
294# path = \"s3+https://nbg1.your-objectstorage.com/my-pond\"
295#
296# [creds.default]
297# access_key_id = \"...\"
298# secret_access_key = \"...\"
299";
300
301#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
303#[serde(deny_unknown_fields)]
304pub struct Config {
305 #[serde(default)]
306 pub embeddings: EmbeddingsConfig,
307 #[serde(default)]
308 pub search: SearchConfig,
309 #[serde(default)]
310 pub maintenance: MaintenanceConfig,
311 #[serde(default)]
312 pub runtime: RuntimeConfig,
313 #[serde(default)]
319 pub sources: BTreeMap<String, Value>,
320 #[serde(default)]
323 pub storage: StorageConfig,
324 #[serde(default)]
328 pub creds: BTreeMap<String, CredsSet>,
329}
330
331#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
335#[serde(deny_unknown_fields)]
336pub struct StorageConfig {
337 #[serde(default)]
338 pub path: Option<String>,
339}
340
341#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
346#[serde(deny_unknown_fields)]
347pub struct CredsSet {
348 #[serde(default)]
350 pub scope: Option<String>,
351 #[serde(default, deserialize_with = "lenient_string")]
355 pub access_key_id: Option<String>,
356 #[serde(default)]
357 pub access_key_id_file: Option<PathBuf>,
358 #[serde(default, deserialize_with = "lenient_string")]
359 pub secret_access_key: Option<String>,
360 #[serde(default)]
361 pub secret_access_key_file: Option<PathBuf>,
362 #[serde(default)]
363 pub secret_access_key_command: Option<String>,
364 #[serde(default, deserialize_with = "lenient_string")]
365 pub region: Option<String>,
366 #[serde(default)]
367 pub virtual_hosted_style_request: Option<bool>,
368 #[serde(default)]
369 pub extra: BTreeMap<String, String>,
370}
371
372#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
377#[serde(deny_unknown_fields, default)]
378pub struct RuntimeConfig {
379 #[serde(default, deserialize_with = "deserialize_byte_size_opt")]
380 pub index_cache_bytes: Option<usize>,
381 #[serde(default, deserialize_with = "deserialize_byte_size_opt")]
382 pub metadata_cache_bytes: Option<usize>,
383}
384
385#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
387#[serde(deny_unknown_fields)]
388pub struct SearchConfig {
389 #[serde(default)]
390 pub nprobes: Option<usize>,
391 #[serde(default)]
392 pub refine_factor: Option<u32>,
393}
394
395#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
400#[serde(deny_unknown_fields)]
401pub struct MaintenanceConfig {
402 #[serde(default)]
407 pub compaction_fragment_cap: Option<usize>,
408 #[serde(default)]
413 pub cleanup_older_than: Option<String>,
414 #[serde(default)]
420 pub index_lag_threshold: Option<usize>,
421}
422
423#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
432#[serde(deny_unknown_fields, default)]
433pub struct EmbeddingsConfig {
434 pub model: String,
437 pub dim: usize,
441}
442
443impl Default for EmbeddingsConfig {
444 fn default() -> Self {
445 Self {
446 model: crate::embed::DEFAULT_MODEL_ID.to_owned(),
447 dim: crate::sessions::DEFAULT_EMBEDDING_DIM,
448 }
449 }
450}
451
452pub fn default_storage_path(xdg_data_home: Option<PathBuf>, home: Option<PathBuf>) -> Result<Url> {
458 if let Some(xdg) = xdg_data_home.filter(|path| path.is_absolute()) {
459 return url_for_path(xdg.join("pond"));
460 }
461 if let Some(home) = home {
462 return url_for_path(home.join(".local").join("share").join("pond"));
463 }
464 url_for_path(PathBuf::from(".pond"))
466}
467
468pub fn default_config_path(xdg_config_home: Option<PathBuf>, home: Option<PathBuf>) -> PathBuf {
473 if let Some(xdg) = xdg_config_home.filter(|path| path.is_absolute()) {
474 return xdg.join("pond").join("config.toml");
475 }
476 if let Some(home) = home {
477 return home.join(".config").join("pond").join("config.toml");
478 }
479 PathBuf::from(".pond.toml")
480}
481
482impl Config {
483 pub fn load(path: impl AsRef<Path>) -> Result<Self> {
491 Ok(Self::load_with_provenance(path)?.0)
492 }
493
494 pub fn load_str(body: &str) -> Result<Self> {
499 let figment = Figment::new().merge(Toml::string(body)).merge(env_mirror());
500 let config: Self = figment
501 .extract_lossy()
502 .map_err(|error| anyhow!("failed to load config: {error}"))?;
503 config.embeddings.validate()?;
504 config.validate_creds()?;
505 Ok(config)
506 }
507
508 pub fn load_with_provenance(path: impl AsRef<Path>) -> Result<(Self, Figment)> {
511 let path = path.as_ref();
512 let figment = Figment::new().merge(Toml::file(path)).merge(env_mirror());
513 let config: Self = figment.extract_lossy().map_err(|error| {
517 if let Some(recipe) = detect_legacy_storage(path) {
518 return anyhow!("{recipe}");
519 }
520 anyhow!("failed to load config {}: {error}", path.display())
523 })?;
524 config.embeddings.validate()?;
525 config.validate_creds()?;
526 config.embeddings.install_runtime();
527 if let Some(threshold) = config.maintenance.index_lag_threshold {
528 crate::substrate::init_index_lag_threshold(threshold);
529 }
530 Ok((config, figment))
534 }
535
536 fn validate_creds(&self) -> Result<()> {
541 let mut scopeless: Option<&str> = None;
542 let mut scopes: BTreeMap<String, &str> = BTreeMap::new();
543 for (name, set) in &self.creds {
544 let mut chars = name.chars();
548 let head_ok = chars.next().is_some_and(|c| c.is_ascii_lowercase());
549 if !head_ok
550 || name.len() > 16
551 || !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
552 {
553 bail!(
554 "creds set name {name:?} must match [a-z][a-z0-9]{{0,15}} (lowercase alphanumeric, no separators)"
555 );
556 }
557 if set.access_key_id.is_some() && set.access_key_id_file.is_some() {
558 bail!("[creds.{name}] sets both access_key_id and access_key_id_file; pick one");
559 }
560 let secret_variants = [
561 set.secret_access_key.is_some(),
562 set.secret_access_key_file.is_some(),
563 set.secret_access_key_command.is_some(),
564 ]
565 .iter()
566 .filter(|present| **present)
567 .count();
568 if secret_variants > 1 {
569 bail!(
570 "[creds.{name}] sets more than one of secret_access_key / secret_access_key_file / secret_access_key_command; pick one"
571 );
572 }
573 match set.scope.as_deref() {
574 None => {
575 if let Some(other) = scopeless {
576 bail!(
577 "[creds.{other}] and [creds.{name}] are both scope-less; at most one catch-all set is allowed - add a `scope` to one"
578 );
579 }
580 scopeless = Some(name);
581 }
582 Some(scope) => {
583 let canonical = crate::substrate::parse_scope(scope)
588 .map(|url| url.as_str().trim_end_matches('/').to_owned())
589 .with_context(|| {
590 format!("[creds.{name}] scope {scope:?} is not a valid URL prefix")
591 })?;
592 if let Some(other) = scopes.insert(canonical, name) {
593 bail!(
594 "[creds.{other}] and [creds.{name}] declare the same scope {scope:?}; merge them or narrow one"
595 );
596 }
597 }
598 }
599 }
600 Ok(())
601 }
602
603 pub fn resolve_sources(&self, adapter: Option<&str>) -> Result<Vec<(String, Value)>> {
612 match adapter {
613 None => Ok(self
614 .sources
615 .iter()
616 .filter_map(|(name, blob)| take_enabled(name, blob))
617 .collect()),
618 Some(name) => {
619 let blob = self
620 .sources
621 .get(name)
622 .ok_or_else(|| anyhow!("no [sources.{name}] entry in config"))?;
623 take_enabled(name, blob).map(|entry| vec![entry]).ok_or_else(|| {
624 anyhow!(
625 "source [{name}] is disabled (enabled = false); run `pond sync {name}` to re-enable"
626 )
627 })
628 }
629 }
630 }
631
632 pub fn disabled_source_names(&self) -> Vec<&str> {
637 self.sources
638 .iter()
639 .filter_map(|(name, blob)| {
640 let enabled = blob
641 .get("enabled")
642 .and_then(Value::as_bool)
643 .unwrap_or(false);
644 if enabled { None } else { Some(name.as_str()) }
645 })
646 .collect()
647 }
648}
649
650fn env_mirror() -> Env {
656 Env::prefixed("POND_")
659 .filter(|key| {
660 let key = key.as_str().to_ascii_lowercase();
661 key == "storage_path" || (key.starts_with("creds_") && !key.ends_with("_extra"))
665 })
666 .map(|key| {
667 let key = key.as_str().to_ascii_lowercase();
671 let dots = if key.starts_with("creds_") { 2 } else { 1 };
672 key.replacen('_', ".", dots).into()
673 })
674}
675
676pub const LEGACY_ENDPOINT_KEYS: &[&str] = &["aws_endpoint", "endpoint"];
682pub const LEGACY_ACCESS_KEY_KEYS: &[&str] = &["aws_access_key_id", "access_key_id"];
683pub const LEGACY_SECRET_KEY_KEYS: &[&str] = &["aws_secret_access_key", "secret_access_key"];
684pub const LEGACY_VIRTUAL_HOSTED_KEYS: &[&str] = &[
685 "aws_virtual_hosted_style_request",
686 "virtual_hosted_style_request",
687];
688
689fn detect_legacy_storage(path: &Path) -> Option<String> {
694 let text = std::fs::read_to_string(path).ok()?;
695 let value: toml::Value = toml::from_str(&text).ok()?;
696 let storage = value.get("storage")?.as_table()?;
697 if storage.is_empty() || storage.keys().all(|key| key == "path") {
698 return None;
699 }
700 let get = |names: &[&str]| {
701 storage.iter().find_map(|(key, value)| {
702 names
703 .iter()
704 .any(|name| key.eq_ignore_ascii_case(name))
705 .then(|| value.as_str().unwrap_or_default().to_owned())
706 })
707 };
708 let endpoint = get(LEGACY_ENDPOINT_KEYS);
709 let host = endpoint
710 .as_deref()
711 .and_then(|e| e.split("://").nth(1))
712 .unwrap_or("<endpoint-host>");
713 let virtual_hosted = storage.iter().any(|(key, value)| {
717 LEGACY_VIRTUAL_HOSTED_KEYS
718 .iter()
719 .any(|name| key.eq_ignore_ascii_case(name))
720 && (value.as_bool().unwrap_or(false)
721 || value
722 .as_str()
723 .is_some_and(|text| text.eq_ignore_ascii_case("true") || text == "1"))
724 });
725 let path_recipe = match host.split_once('.') {
726 Some((bucket, rest)) if virtual_hosted && rest.contains('.') => {
727 format!("s3+https://{rest}/{bucket}/<prefix>")
728 }
729 _ => format!("s3+https://{host}/<bucket>/<prefix>"),
730 };
731 let mut recipe = format!(
734 "config {} uses the old [storage] passthrough map; rewrite it as:\n\n[storage]\npath = \"{path_recipe}\"\n\n[creds.default]\n",
735 path.display(),
736 );
737 recipe.push_str("access_key_id = \"...\" # copy from the old [storage] section\n");
738 recipe.push_str("secret_access_key = \"...\" # copy from the old [storage] section\n");
739 recipe.push_str(
740 "\n(the endpoint and bucket fold into the URL; allow_http is scheme-derived; virtual-hosted addressing defaults on; the region is autodetected - append ?region=<x> to the URL only if your store insists. `pond storage check` verifies the result end-to-end, and `pond init` can apply this rewrite for you)",
741 );
742 Some(recipe)
743}
744
745fn take_enabled(name: &str, blob: &Value) -> Option<(String, Value)> {
749 let enabled = blob
750 .get("enabled")
751 .and_then(Value::as_bool)
752 .unwrap_or(false);
753 if !enabled {
754 return None;
755 }
756 let mut clean = blob.clone();
757 if let Some(obj) = clean.as_object_mut() {
758 obj.remove("enabled");
759 }
760 Some((name.to_owned(), clean))
761}
762
763pub fn expand_home_under(path: &Path, home: &Path) -> PathBuf {
770 let Some(text) = path.to_str() else {
771 return path.to_path_buf();
772 };
773 let home_text = home.to_string_lossy();
774 let expanded = shellexpand::full_with_context_no_errors(
775 text,
776 || Some(home_text.clone()),
777 |var| std::env::var(var).ok(),
778 );
779 PathBuf::from(expanded.as_ref())
780}
781
782pub fn contract_home_under(path: &Path, home: &Path) -> PathBuf {
787 match path.strip_prefix(home) {
788 Ok(rest) if rest.as_os_str().is_empty() => PathBuf::from("~"),
789 Ok(rest) => Path::new("~").join(rest),
790 Err(_) => path.to_path_buf(),
791 }
792}
793
794pub fn contract_home(path: &Path) -> PathBuf {
798 match std::env::var_os("HOME") {
799 Some(home) => contract_home_under(path, Path::new(&home)),
800 None => path.to_path_buf(),
801 }
802}
803
804impl EmbeddingsConfig {
805 pub fn validate(&self) -> Result<()> {
810 if self.model.trim().is_empty() {
811 bail!("embeddings.model must be a non-empty HuggingFace model id");
812 }
813 if self.dim == 0 || !self.dim.is_multiple_of(8) {
814 bail!(
815 "embeddings.dim = {} must be a positive multiple of 8 (IVF_PQ subspace stride)",
816 self.dim,
817 );
818 }
819 Ok(())
820 }
821
822 pub fn install_runtime(&self) {
826 crate::embed::init_model_id(self.model.clone());
827 crate::sessions::init_embedding_dim(self.dim);
828 }
829}
830
831#[cfg(test)]
832mod tests {
833 #![allow(clippy::expect_used, clippy::unwrap_used, clippy::result_large_err)]
836
837 use super::*;
838 use serde_json::Value;
839 use tempfile::TempDir;
840
841 #[test]
842 fn validate_catches_empty_model_and_bad_dim() {
843 assert!(EmbeddingsConfig::default().validate().is_ok());
844 let bad_model = EmbeddingsConfig {
847 model: " ".to_owned(),
848 dim: 768,
849 };
850 assert!(bad_model.validate().is_err());
851 let bad_dim = EmbeddingsConfig {
853 model: "intfloat/multilingual-e5-base".to_owned(),
854 dim: 100,
855 };
856 assert!(bad_dim.validate().is_err());
857 let zero_dim = EmbeddingsConfig {
859 model: "intfloat/multilingual-e5-base".to_owned(),
860 dim: 0,
861 };
862 assert!(zero_dim.validate().is_err());
863 }
864
865 #[test]
866 fn config_load_missing_file_falls_back_to_builtin() {
867 let config = Config::load("/nonexistent/pond-config-xyz.toml").unwrap();
868 assert_eq!(config.embeddings, EmbeddingsConfig::default());
869 }
870
871 #[test]
872 fn default_config_toml_loads_to_the_builtin_defaults() {
873 let dir = TempDir::new().unwrap();
874 let path = dir.path().join("config.toml");
875 std::fs::write(&path, DEFAULT_CONFIG_TOML).unwrap();
876 let config = Config::load(&path).unwrap();
879 assert_eq!(config.embeddings, EmbeddingsConfig::default());
880 assert_eq!(config.embeddings.model, crate::embed::DEFAULT_MODEL_ID);
881 assert_eq!(
882 config.embeddings.dim,
883 crate::sessions::DEFAULT_EMBEDDING_DIM
884 );
885 }
886
887 #[test]
888 fn default_storage_path_follows_xdg_then_home() {
889 let resolved =
891 default_storage_path(Some(PathBuf::from("/xdg")), Some(PathBuf::from("/home")))
892 .unwrap();
893 assert!(is_local(&resolved));
894 assert_eq!(local_path(&resolved).unwrap(), PathBuf::from("/xdg/pond"));
895
896 let resolved = default_storage_path(
898 Some(PathBuf::from("relative")),
899 Some(PathBuf::from("/home")),
900 )
901 .unwrap();
902 assert_eq!(
903 local_path(&resolved).unwrap(),
904 PathBuf::from("/home/.local/share/pond"),
905 );
906
907 let resolved = default_storage_path(None, None).unwrap();
911 assert!(is_local(&resolved));
912 assert!(
913 local_path(&resolved).unwrap().ends_with(".pond"),
914 "fallback path should end with .pond: {resolved}",
915 );
916 }
917
918 #[test]
919 fn expand_home_under_handles_tilde_forms() {
920 let home = Path::new("/srv/me");
921 assert_eq!(
922 expand_home_under(Path::new("~"), home),
923 PathBuf::from("/srv/me")
924 );
925 assert_eq!(
926 expand_home_under(Path::new("~/.codex/sessions"), home),
927 PathBuf::from("/srv/me/.codex/sessions"),
928 );
929 assert_eq!(
931 expand_home_under(Path::new("/etc/passwd"), home),
932 PathBuf::from("/etc/passwd"),
933 );
934 assert_eq!(
936 expand_home_under(Path::new("~user/elsewhere"), home),
937 PathBuf::from("~user/elsewhere"),
938 );
939 }
940
941 #[test]
942 fn expand_home_under_handles_env_vars() {
943 figment::Jail::expect_with(|jail| {
945 jail.set_env("POND_TEST_EXPAND_DIR", "/srv/data");
946 let home = Path::new("/srv/me");
947 assert_eq!(
948 expand_home_under(Path::new("$POND_TEST_EXPAND_DIR/pond"), home),
949 PathBuf::from("/srv/data/pond"),
950 );
951 assert_eq!(
952 expand_home_under(Path::new("${POND_TEST_EXPAND_DIR}/pond"), home),
953 PathBuf::from("/srv/data/pond"),
954 );
955 assert_eq!(
957 expand_home_under(Path::new("$POND_TEST_UNSET_VAR/x"), home),
958 PathBuf::from("$POND_TEST_UNSET_VAR/x"),
959 );
960 Ok(())
961 });
962 }
963
964 #[test]
965 fn contract_home_under_inverts_expansion() {
966 let home = Path::new("/srv/me");
967 assert_eq!(
968 contract_home_under(Path::new("/srv/me/.local/share/pond"), home),
969 PathBuf::from("~/.local/share/pond"),
970 );
971 assert_eq!(
972 contract_home_under(Path::new("/srv/me"), home),
973 PathBuf::from("~")
974 );
975 assert_eq!(
977 contract_home_under(Path::new("/etc/passwd"), home),
978 PathBuf::from("/etc/passwd"),
979 );
980 }
981
982 #[test]
983 fn resolve_sources_returns_one_or_all_or_errors() {
984 let temp = TempDir::new().unwrap();
985 let body = "\
986[sources.claude-code]
987enabled = true
988path = \"/srv/claude\"
989
990[sources.codex-cli]
991enabled = true
992path = \"/srv/codex\"
993
994[sources.opencode]
995enabled = false
996";
997 let path = temp.path().join("config.toml");
998 std::fs::write(&path, body).expect("write config");
999 let config = Config::load(&path).unwrap();
1000
1001 let all = config.resolve_sources(None).unwrap();
1003 assert_eq!(all.len(), 2);
1004 let names: Vec<_> = all.iter().map(|(n, _)| n.as_str()).collect();
1005 assert!(names.contains(&"claude-code"));
1006 assert!(names.contains(&"codex-cli"));
1007 for (_, blob) in &all {
1009 assert!(blob.get("enabled").is_none(), "enabled should be stripped");
1010 }
1011
1012 let one = config.resolve_sources(Some("codex-cli")).unwrap();
1014 assert_eq!(one.len(), 1);
1015 assert_eq!(one[0].0, "codex-cli");
1016 assert_eq!(
1017 one[0].1.get("path").and_then(Value::as_str),
1018 Some("/srv/codex"),
1019 );
1020
1021 let disabled = config.resolve_sources(Some("opencode"));
1023 let err = disabled
1024 .expect_err("disabled adapter must error")
1025 .to_string();
1026 assert!(err.contains("enabled = false"), "got: {err}");
1027 assert!(err.contains("pond sync opencode"), "got: {err}");
1028
1029 assert!(config.resolve_sources(Some("nope")).is_err());
1031
1032 assert_eq!(config.disabled_source_names(), vec!["opencode"]);
1034 }
1035
1036 #[test]
1037 fn memory_uri_is_classified_as_remote() {
1038 let url = Url::parse("memory:///pond-remote-test").expect("memory uri parses");
1039 assert!(
1040 !is_local(&url),
1041 "memory:// is not a local-filesystem URL: {url}",
1042 );
1043 assert!(
1044 local_path(&url).is_none(),
1045 "local_path must return None for non-file schemes",
1046 );
1047 }
1048
1049 #[test]
1054 fn storage_and_creds_round_trip() {
1055 figment::Jail::expect_with(|jail| {
1056 jail.create_file(
1057 "config.toml",
1058 r#"
1059[storage]
1060path = "s3+https://nbg1.example.com/my-pond"
1061
1062[creds.default]
1063access_key_id = "AKIA123"
1064secret_access_key = "shh"
1065
1066[creds.work]
1067scope = "s3+https://fsn1.example.com/work-pond/"
1068access_key_id = "AKIA456"
1069secret_access_key_command = "op read op://vault/pond/secret"
1070region = "fsn1"
1071virtual_hosted_style_request = false
1072extra = { request_timeout = "60 seconds" }
1073"#,
1074 )?;
1075 let config = Config::load("config.toml").expect("config loads");
1076 assert_eq!(
1077 config.storage.path.as_deref(),
1078 Some("s3+https://nbg1.example.com/my-pond"),
1079 );
1080 assert_eq!(config.creds.len(), 2);
1081 let work = &config.creds["work"];
1082 assert_eq!(
1083 work.secret_access_key_command.as_deref(),
1084 Some("op read op://vault/pond/secret"),
1085 );
1086 assert_eq!(work.virtual_hosted_style_request, Some(false));
1087 assert_eq!(work.extra["request_timeout"], "60 seconds");
1088 Ok(())
1089 });
1090 }
1091
1092 #[test]
1093 fn creds_validators_reject_bad_shapes() {
1094 let cases: &[(&str, &str)] = &[
1095 ("[creds.a]\nacces_key_id = \"x\"\n", "acces_key_id"),
1097 ("[creds.my_set]\naccess_key_id = \"x\"\n", "[a-z][a-z0-9]"),
1099 ("[creds.A1]\naccess_key_id = \"x\"\n", "[a-z][a-z0-9]"),
1100 (
1102 "[creds.a]\nsecret_access_key = \"x\"\nsecret_access_key_command = \"cat\"\n",
1103 "more than one",
1104 ),
1105 (
1106 "[creds.a]\naccess_key_id = \"x\"\naccess_key_id_file = \"/k\"\n",
1107 "pick one",
1108 ),
1109 (
1111 "[creds.a]\naccess_key_id = \"x\"\n[creds.b]\naccess_key_id = \"y\"\n",
1112 "scope-less",
1113 ),
1114 (
1117 "[creds.a]\nscope = \"s3+https://h:443/b/\"\naccess_key_id = \"x\"\n[creds.b]\nscope = \"s3+https://h/b\"\naccess_key_id = \"y\"\n",
1118 "same scope",
1119 ),
1120 ];
1121 figment::Jail::expect_with(|jail| {
1122 for (body, needle) in cases {
1123 jail.create_file("config.toml", body)?;
1124 let err = Config::load("config.toml").expect_err(body).to_string();
1125 assert!(
1126 err.contains(needle),
1127 "want {needle:?} in error for {body:?}, got: {err}",
1128 );
1129 }
1130 Ok(())
1131 });
1132 }
1133
1134 #[test]
1135 fn legacy_storage_map_errors_with_the_rewrite_recipe() {
1136 figment::Jail::expect_with(|jail| {
1137 jail.create_file(
1138 "config.toml",
1139 r#"
1140[storage]
1141AWS_ACCESS_KEY_ID = "AKIA123"
1142AWS_SECRET_ACCESS_KEY = "shh"
1143AWS_REGION = "nbg1"
1144AWS_ENDPOINT = "https://ttq.nbg1.your-objectstorage.com"
1145aws_virtual_hosted_style_request = "true"
1146"#,
1147 )?;
1148 let err = Config::load("config.toml")
1149 .expect_err("legacy map must error")
1150 .to_string();
1151 assert!(err.contains("old [storage] passthrough map"), "got: {err}");
1153 assert!(
1157 err.contains("s3+https://nbg1.your-objectstorage.com/ttq/<prefix>"),
1158 "recipe must de-fold the virtual-hosted endpoint, got: {err}",
1159 );
1160 assert!(!err.contains("AKIA123"), "got: {err}");
1163 assert!(!err.contains("\"shh\""), "got: {err}");
1164 assert!(err.contains("access_key_id = \"...\""), "got: {err}");
1165 assert!(!err.contains("region ="), "got: {err}");
1169 assert!(err.contains("?region="), "got: {err}");
1170 assert!(err.contains("pond storage check"), "got: {err}");
1171 jail.create_file(
1174 "config.toml",
1175 r#"
1176[storage]
1177AWS_ACCESS_KEY_ID = "AKIA123"
1178AWS_ENDPOINT = "https://ttq.nbg1.your-objectstorage.com"
1179"#,
1180 )?;
1181 let err = Config::load("config.toml")
1182 .expect_err("legacy map must error")
1183 .to_string();
1184 assert!(
1185 err.contains("s3+https://ttq.nbg1.your-objectstorage.com/<bucket>/<prefix>"),
1186 "got: {err}",
1187 );
1188 Ok(())
1189 });
1190 }
1191
1192 #[test]
1193 fn env_mirror_layers_over_file() {
1194 figment::Jail::expect_with(|jail| {
1195 jail.create_file(
1196 "config.toml",
1197 r#"
1198[storage]
1199path = "/from-file"
1200
1201[creds.work]
1202scope = "s3://file-bucket/"
1203access_key_id = "from-file"
1204region = "file-region"
1205"#,
1206 )?;
1207 jail.set_env("POND_STORAGE_PATH", "/from-env");
1209 jail.set_env("POND_CREDS_WORK_ACCESS_KEY_ID", "from-env");
1210 jail.set_env("POND_CREDS_WORK_SECRET_ACCESS_KEY", "12345");
1212 jail.set_env("POND_CREDS_CI_ACCESS_KEY_ID", "ci-key");
1214 let config = Config::load("config.toml").expect("env+file config loads");
1215 assert_eq!(config.storage.path.as_deref(), Some("/from-env"));
1216 let work = &config.creds["work"];
1217 assert_eq!(work.access_key_id.as_deref(), Some("from-env"));
1218 assert_eq!(work.secret_access_key.as_deref(), Some("12345"));
1219 assert_eq!(work.region.as_deref(), Some("file-region"));
1220 assert_eq!(work.scope.as_deref(), Some("s3://file-bucket/"));
1221 assert_eq!(config.creds["ci"].access_key_id.as_deref(), Some("ci-key"));
1222 Ok(())
1223 });
1224 }
1225}