1use std::{
11 collections::BTreeMap,
12 path::{Path, PathBuf},
13};
14
15use anyhow::{Context, Result, anyhow, bail};
16use figment::{
17 Figment,
18 providers::{Env, Format, Toml},
19};
20use serde::{Deserialize, Deserializer, Serialize, de};
21use serde_json::Value;
22use url::Url;
23
24fn parse_byte_size(raw: &str) -> Result<usize, String> {
29 let trimmed = raw.trim();
30 if trimmed.is_empty() {
31 return Err("byte-size value is empty".to_owned());
32 }
33 let split = trimmed
34 .find(|c: char| c.is_ascii_alphabetic())
35 .unwrap_or(trimmed.len());
36 let (number, unit) = trimmed.split_at(split);
37 let number: f64 = number
38 .trim()
39 .parse()
40 .map_err(|_| format!("byte-size value {raw:?} is not a number"))?;
41 if !number.is_finite() || number < 0.0 {
42 return Err(format!("byte-size value {raw:?} must be non-negative"));
43 }
44 let multiplier: f64 = match unit.trim().to_ascii_lowercase().as_str() {
45 "" | "b" => 1.0,
46 "k" | "kb" => 1_000.0,
47 "kib" => 1_024.0,
48 "m" | "mb" => 1_000_000.0,
49 "mib" => 1_048_576.0,
50 "g" | "gb" => 1_000_000_000.0,
51 "gib" => 1_073_741_824.0,
52 "tib" => 1_099_511_627_776.0,
53 other => {
54 return Err(format!(
55 "byte-size unit {other:?} not recognized (try MiB / GiB)"
56 ));
57 }
58 };
59 let bytes = number * multiplier;
60 if !bytes.is_finite() || bytes > usize::MAX as f64 {
61 return Err(format!("byte-size value {raw:?} overflows usize"));
62 }
63 Ok(bytes as usize)
64}
65
66fn lenient_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
70where
71 D: Deserializer<'de>,
72{
73 #[derive(Deserialize)]
74 #[serde(untagged)]
75 enum Repr {
76 Text(String),
77 Int(i64),
78 Float(f64),
79 Bool(bool),
80 }
81 Ok(
82 Option::<Repr>::deserialize(deserializer)?.map(|repr| match repr {
83 Repr::Text(value) => value,
84 Repr::Int(value) => value.to_string(),
85 Repr::Float(value) => value.to_string(),
86 Repr::Bool(value) => value.to_string(),
87 }),
88 )
89}
90
91fn deserialize_byte_size_opt<'de, D>(deserializer: D) -> Result<Option<usize>, D::Error>
92where
93 D: Deserializer<'de>,
94{
95 #[derive(Deserialize)]
96 #[serde(untagged)]
97 enum Repr {
98 Bytes(u64),
99 Text(String),
100 }
101 let repr: Option<Repr> = Option::deserialize(deserializer)?;
102 match repr {
103 None => Ok(None),
104 Some(Repr::Bytes(value)) => usize::try_from(value).map(Some).map_err(de::Error::custom),
105 Some(Repr::Text(value)) => parse_byte_size(&value).map(Some).map_err(de::Error::custom),
106 }
107}
108
109pub fn is_local(url: &Url) -> bool {
113 matches!(url.scheme(), "file" | "file+uring")
114}
115
116pub fn local_path(url: &Url) -> Option<PathBuf> {
118 if is_local(url) {
119 url.to_file_path().ok()
120 } else {
121 None
122 }
123}
124
125pub fn child_uri(base: &Url, suffix: &str) -> String {
131 if let Some(path) = local_path(base) {
135 return path.join(suffix).display().to_string();
136 }
137 format!("{}/{suffix}", base.as_str().trim_end_matches('/'))
138}
139
140pub fn display(url: &Url) -> String {
144 if let Some(path) = local_path(url) {
145 contract_home(&path).display().to_string()
146 } else {
147 url.to_string()
148 }
149}
150
151pub fn url_for_path(path: impl AsRef<Path>) -> Result<Url> {
156 let path = path.as_ref();
157 let absolute = if path.is_absolute() {
158 path.to_path_buf()
159 } else {
160 std::path::absolute(path)
161 .with_context(|| format!("failed to absolutize {}", path.display()))?
162 };
163 Url::from_file_path(&absolute).map_err(|()| {
164 anyhow!(
165 "failed to convert path {} into a file:// URL",
166 absolute.display()
167 )
168 })
169}
170
171pub const DEFAULT_CONFIG_TOML: &str = "\
175# pond configuration.
176#
177# pond ships built-in defaults, so every setting here is optional - delete this
178# file and pond still works. Uncomment and edit to override.
179
180# Where pond looks for adapter data to import. One entry per adapter type
181# (`claude-code`, `codex-cli`, ...). `pond sync` with no arguments syncs every
182# entry; `pond sync <adapter>` syncs just one. With an empty `[adapters]`,
183# `pond sync` runs an interactive discovery against the known default paths
184# and writes the picks back here.
185#
186# Future wrap: pond is single-namespace in v1 (spec.md#wire-namespace-resolution); `[adapters]` is
187# flat here. When multi-namespace pond lands, adapter registration becomes
188# per-tenant under `[namespaces.<ns>.adapters.<adapter>]`. Pre-v1 the schema
189# is breakable; the rename is operationally free until a real second tenant
190# exists.
191#
192# [adapters.claude-code]
193# enabled = true
194# path = \"~/.claude/projects\"
195#
196# [adapters.codex-cli]
197# enabled = true
198# path = \"~/.codex/sessions\"
199#
200# Set `enabled = false` to keep the section but skip it on `pond sync`;
201# re-enable via `pond adapters enable <adapter>`.
202
203# Embeddings. Search defaults to the vector arm (matching on meaning) when the
204# store has any vectors, falling back to FTS otherwise - the model loads lazily
205# on the first vector query, so there's no cost on FTS-only corpora. `model`
206# selects the HuggingFace XLM-RoBERTa model; `dim` declares its output width and
207# is baked into the messages.vector schema on table creation - it must equal the
208# model's hidden_size.
209#
210# Common pairings:
211# model = \"intfloat/multilingual-e5-small\" dim = 384 (default)
212# model = \"intfloat/multilingual-e5-base\" dim = 768
213# model = \"intfloat/multilingual-e5-large\" dim = 1024
214#
215# A different-dim model needs a fresh data dir; pond enforces this at the
216# schema boundary.
217#
218# [embeddings]
219# model = \"intfloat/multilingual-e5-small\"
220# dim = 384
221
222# Search tuning. Leave unset for Lance defaults; set when tuning vector recall
223# against a corpus.
224#
225# [search]
226# nprobes = 16
227
228# Storage maintenance. Tunes the compaction + cleanup pass that runs inside
229# `pond sync` and `pond optimize`.
230#
231# - `compaction_fragment_cap` is the per-task fragment-count backstop: a
232# planned compaction task touching at least this many fragments always runs
233# even when the write-amplification veto would skip it. Default 64; 0
234# disables the veto and runs every task Lance plans.
235# - `cleanup_older_than` is the manifest-retention window for the safe cleanup
236# pass. Accepts `Ns` / `Nm` / `Nh` / `Nd` (default `1d`, floor `1h` - it is
237# what protects in-flight readers). Versions older than this are reclaimed
238# by Lance's OCC-coordinated GC.
239#
240# [maintenance]
241# compaction_fragment_cap = 64
242# cleanup_older_than = \"1d\"
243
244# Long-running process caps. Both accept either a plain byte count or a
245# humansize-style suffix (\"128 MiB\", \"1 GiB\"). Both are optional - leave
246# unset to let pond pick the backend-aware default:
247# local FS : index_cache = 256 MiB, metadata_cache = 128 MiB
248# remote : index_cache = 2 GiB, metadata_cache = 512 MiB
249# Lance's library defaults (6 GiB / 1 GiB) are too generous for a per-session
250# `pond mcp` process; tightening them is what keeps RSS under the 500 MiB target
251# without measurable latency regressions on typical agent-history corpora.
252#
253# [runtime]
254# index_cache_bytes = \"256 MiB\"
255# metadata_cache_bytes = \"128 MiB\"
256
257# Storage address and credentials (spec.md#storage-url-grammar).
258#
259# `path` is the default destination used when `--storage-path` (env
260# `POND_STORAGE_PATH`) is not passed. Absent = the platform-local data dir.
261# Addresses are URLs; the `s3+https` form carries the endpoint, bucket, and
262# prefix in one token:
263#
264# /abs/path or ~/path local filesystem
265# s3://bucket/prefix AWS S3 (ambient credential chain)
266# s3+https://host/bucket/prefix S3-compatible endpoint (Hetzner, R2, B2, MinIO)
267# gs://bucket/prefix Google Cloud Storage
268# az://account/container/prefix Azure Blob
269#
270# Credentials live in `[creds.<name>]` sets and bind to URLs by `scope`
271# prefix - longest match wins (spec.md#creds-scope-match); a set without
272# `scope` matches any URL. With no matching set, the standard cloud SDK
273# chain applies (AWS_* env, shared credentials file, instance metadata).
274# Secrets never go in URLs or CLI flags; besides inline values,
275# `access_key_id_file` / `secret_access_key_file` read a file and
276# `secret_access_key_command` runs a command (e.g. `op read ...`). `extra`
277# holds verbatim `object_store` options pond has not typed.
278#
279# Every field mirrors to env: `POND_STORAGE_PATH`, `POND_CREDS_<NAME>_<FIELD>`
280# (set names are lowercase alphanumeric, so the env grammar is unambiguous).
281# Precedence: CLI flag > POND_* env > this file > ambient cloud chain.
282# Probe a destination end-to-end with `pond storage check`.
283#
284# Future wrap: pond is single-namespace in v1 (spec.md#wire-namespace-resolution);
285# `[storage]` is flat here on the assumption of one bucket per pond. When
286# multi-namespace pond lands this becomes `[namespaces.<ns>.storage]`.
287#
288# [storage]
289# path = \"s3+https://nbg1.your-objectstorage.com/my-pond\"
290#
291# [creds.default]
292# access_key_id = \"...\"
293# secret_access_key = \"...\"
294";
295
296#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
298#[serde(deny_unknown_fields)]
299pub struct Config {
300 #[serde(default)]
301 pub embeddings: EmbeddingsConfig,
302 #[serde(default)]
303 pub search: SearchConfig,
304 #[serde(default)]
305 pub maintenance: MaintenanceConfig,
306 #[serde(default)]
307 pub runtime: RuntimeConfig,
308 #[serde(default)]
314 pub adapters: BTreeMap<String, Value>,
315 #[serde(default)]
318 pub storage: StorageConfig,
319 #[serde(default)]
323 pub creds: BTreeMap<String, CredsSet>,
324}
325
326#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
330#[serde(deny_unknown_fields)]
331pub struct StorageConfig {
332 #[serde(default)]
333 pub path: Option<String>,
334}
335
336#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
341#[serde(deny_unknown_fields)]
342pub struct CredsSet {
343 #[serde(default)]
345 pub scope: Option<String>,
346 #[serde(default, deserialize_with = "lenient_string")]
350 pub access_key_id: Option<String>,
351 #[serde(default)]
352 pub access_key_id_file: Option<PathBuf>,
353 #[serde(default, deserialize_with = "lenient_string")]
354 pub secret_access_key: Option<String>,
355 #[serde(default)]
356 pub secret_access_key_file: Option<PathBuf>,
357 #[serde(default)]
358 pub secret_access_key_command: Option<String>,
359 #[serde(default, deserialize_with = "lenient_string")]
360 pub region: Option<String>,
361 #[serde(default)]
362 pub virtual_hosted_style_request: Option<bool>,
363 #[serde(default)]
364 pub extra: BTreeMap<String, String>,
365}
366
367pub fn valid_creds_set_name(name: &str) -> bool {
371 let mut chars = name.chars();
372 chars.next().is_some_and(|c| c.is_ascii_lowercase())
373 && name.len() <= 16
374 && chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
375}
376
377pub fn creds_set_name_error(name: &str) -> String {
380 format!(
381 "creds set name {name:?} must match [a-z][a-z0-9]{{0,15}} (lowercase alphanumeric, no separators)"
382 )
383}
384
385#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
390#[serde(deny_unknown_fields, default)]
391pub struct RuntimeConfig {
392 #[serde(default, deserialize_with = "deserialize_byte_size_opt")]
393 pub index_cache_bytes: Option<usize>,
394 #[serde(default, deserialize_with = "deserialize_byte_size_opt")]
395 pub metadata_cache_bytes: Option<usize>,
396}
397
398#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
400#[serde(deny_unknown_fields)]
401pub struct SearchConfig {
402 #[serde(default)]
403 pub nprobes: Option<usize>,
404}
405
406#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
411#[serde(deny_unknown_fields)]
412pub struct MaintenanceConfig {
413 #[serde(default)]
418 pub compaction_fragment_cap: Option<usize>,
419 #[serde(default)]
424 pub cleanup_older_than: Option<String>,
425}
426
427#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
436#[serde(deny_unknown_fields, default)]
437pub struct EmbeddingsConfig {
438 pub model: String,
441 pub dim: usize,
444}
445
446impl Default for EmbeddingsConfig {
447 fn default() -> Self {
448 Self {
449 model: crate::embed::DEFAULT_MODEL_ID.to_owned(),
450 dim: crate::sessions::DEFAULT_EMBEDDING_DIM,
451 }
452 }
453}
454
455pub fn default_storage_path(xdg_data_home: Option<PathBuf>, home: Option<PathBuf>) -> Result<Url> {
461 if let Some(xdg) = xdg_data_home.filter(|path| path.is_absolute()) {
462 return url_for_path(xdg.join("pond"));
463 }
464 if let Some(home) = home {
465 return url_for_path(home.join(".local").join("share").join("pond"));
466 }
467 url_for_path(PathBuf::from(".pond"))
469}
470
471pub fn default_cache_path(xdg_cache_home: Option<PathBuf>, home: Option<PathBuf>) -> PathBuf {
475 if let Some(xdg) = xdg_cache_home.filter(|path| path.is_absolute()) {
476 return xdg.join("pond");
477 }
478 if let Some(home) = home {
479 return home.join(".cache").join("pond");
480 }
481 PathBuf::from(".pond-cache")
482}
483
484pub fn default_config_path(xdg_config_home: Option<PathBuf>, home: Option<PathBuf>) -> PathBuf {
489 if let Some(xdg) = xdg_config_home.filter(|path| path.is_absolute()) {
490 return xdg.join("pond").join("config.toml");
491 }
492 if let Some(home) = home {
493 return home.join(".config").join("pond").join("config.toml");
494 }
495 PathBuf::from(".pond.toml")
496}
497
498impl Config {
499 pub fn load(path: impl AsRef<Path>) -> Result<Self> {
507 Ok(Self::load_with_provenance(path)?.0)
508 }
509
510 pub fn load_str(body: &str) -> Result<Self> {
515 let figment = Figment::new().merge(Toml::string(body)).merge(env_mirror());
516 let config: Self = figment
517 .extract_lossy()
518 .map_err(|error| anyhow!("failed to load config: {error}"))?;
519 config.embeddings.validate()?;
520 config.validate_creds()?;
521 Ok(config)
522 }
523
524 pub fn load_with_provenance(path: impl AsRef<Path>) -> Result<(Self, Figment)> {
527 let path = path.as_ref();
528 let figment = Figment::new().merge(Toml::file(path)).merge(env_mirror());
529 let config: Self = figment.extract_lossy().map_err(|error| {
533 if let Some(recipe) = detect_legacy_storage(path) {
534 return anyhow!("{recipe}");
535 }
536 if let Some(recipe) = detect_legacy_sources(path) {
537 return anyhow!("{recipe}");
538 }
539 anyhow!("failed to load config {}: {error}", path.display())
542 })?;
543 config.embeddings.validate()?;
544 config.validate_creds()?;
545 config.embeddings.install_runtime();
546 Ok((config, figment))
550 }
551
552 fn validate_creds(&self) -> Result<()> {
557 let mut scopeless: Option<&str> = None;
558 let mut scopes: BTreeMap<String, &str> = BTreeMap::new();
559 for (name, set) in &self.creds {
560 if !valid_creds_set_name(name) {
561 bail!(creds_set_name_error(name));
562 }
563 if set.access_key_id.is_some() && set.access_key_id_file.is_some() {
564 bail!("[creds.{name}] sets both access_key_id and access_key_id_file; pick one");
565 }
566 let secret_variants = [
567 set.secret_access_key.is_some(),
568 set.secret_access_key_file.is_some(),
569 set.secret_access_key_command.is_some(),
570 ]
571 .iter()
572 .filter(|present| **present)
573 .count();
574 if secret_variants > 1 {
575 bail!(
576 "[creds.{name}] sets more than one of secret_access_key / secret_access_key_file / secret_access_key_command; pick one"
577 );
578 }
579 match set.scope.as_deref() {
580 None => {
581 if let Some(other) = scopeless {
582 bail!(
583 "[creds.{other}] and [creds.{name}] are both scope-less; at most one catch-all set is allowed - add a `scope` to one"
584 );
585 }
586 scopeless = Some(name);
587 }
588 Some(scope) => {
589 let canonical = crate::substrate::parse_scope(scope)
594 .map(|url| url.as_str().trim_end_matches('/').to_owned())
595 .with_context(|| {
596 format!("[creds.{name}] scope {scope:?} is not a valid URL prefix")
597 })?;
598 if let Some(other) = scopes.insert(canonical, name) {
599 bail!(
600 "[creds.{other}] and [creds.{name}] declare the same scope {scope:?}; merge them or narrow one"
601 );
602 }
603 }
604 }
605 }
606 Ok(())
607 }
608
609 pub fn resolve_adapters(&self, adapter: Option<&str>) -> Result<Vec<(String, Value)>> {
618 match adapter {
619 None => Ok(self
620 .adapters
621 .iter()
622 .filter_map(|(name, blob)| take_enabled(name, blob))
623 .collect()),
624 Some(name) => {
625 let blob = self
626 .adapters
627 .get(name)
628 .ok_or_else(|| anyhow!("no [adapters.{name}] entry in config"))?;
629 take_enabled(name, blob).map(|entry| vec![entry]).ok_or_else(|| {
630 anyhow!(
631 "adapter [{name}] is disabled (enabled = false); run `pond adapters enable {name}` to re-enable, then `pond sync {name}`"
632 )
633 })
634 }
635 }
636 }
637
638 pub fn disabled_adapter_names(&self) -> Vec<&str> {
643 self.adapters
644 .iter()
645 .filter_map(|(name, blob)| {
646 let enabled = blob
647 .get("enabled")
648 .and_then(Value::as_bool)
649 .unwrap_or(false);
650 if enabled { None } else { Some(name.as_str()) }
651 })
652 .collect()
653 }
654}
655
656fn env_mirror() -> Env {
662 Env::prefixed("POND_")
665 .filter(|key| {
666 let key = key.as_str().to_ascii_lowercase();
667 key == "storage_path" || (key.starts_with("creds_") && !key.ends_with("_extra"))
671 })
672 .map(|key| {
673 let key = key.as_str().to_ascii_lowercase();
677 let dots = if key.starts_with("creds_") { 2 } else { 1 };
678 key.replacen('_', ".", dots).into()
679 })
680}
681
682pub const LEGACY_ENDPOINT_KEYS: &[&str] = &["aws_endpoint", "endpoint"];
688pub const LEGACY_ACCESS_KEY_KEYS: &[&str] = &["aws_access_key_id", "access_key_id"];
689pub const LEGACY_SECRET_KEY_KEYS: &[&str] = &["aws_secret_access_key", "secret_access_key"];
690pub const LEGACY_VIRTUAL_HOSTED_KEYS: &[&str] = &[
691 "aws_virtual_hosted_style_request",
692 "virtual_hosted_style_request",
693];
694
695fn detect_legacy_storage(path: &Path) -> Option<String> {
700 let text = std::fs::read_to_string(path).ok()?;
701 let value: toml::Value = toml::from_str(&text).ok()?;
702 let storage = value.get("storage")?.as_table()?;
703 if storage.is_empty() || storage.keys().all(|key| key == "path") {
704 return None;
705 }
706 let get = |names: &[&str]| {
707 storage.iter().find_map(|(key, value)| {
708 names
709 .iter()
710 .any(|name| key.eq_ignore_ascii_case(name))
711 .then(|| value.as_str().unwrap_or_default().to_owned())
712 })
713 };
714 let endpoint = get(LEGACY_ENDPOINT_KEYS);
715 let host = endpoint
716 .as_deref()
717 .and_then(|e| e.split("://").nth(1))
718 .unwrap_or("<endpoint-host>");
719 let virtual_hosted = storage.iter().any(|(key, value)| {
723 LEGACY_VIRTUAL_HOSTED_KEYS
724 .iter()
725 .any(|name| key.eq_ignore_ascii_case(name))
726 && (value.as_bool().unwrap_or(false)
727 || value
728 .as_str()
729 .is_some_and(|text| text.eq_ignore_ascii_case("true") || text == "1"))
730 });
731 let path_recipe = match host.split_once('.') {
732 Some((bucket, rest)) if virtual_hosted && rest.contains('.') => {
733 format!("s3+https://{rest}/{bucket}/<prefix>")
734 }
735 _ => format!("s3+https://{host}/<bucket>/<prefix>"),
736 };
737 let mut recipe = format!(
740 "config {} uses the old [storage] passthrough map; rewrite it as:\n\n[storage]\npath = \"{path_recipe}\"\n\n[creds.default]\n",
741 path.display(),
742 );
743 recipe.push_str("access_key_id = \"...\" # copy from the old [storage] section\n");
744 recipe.push_str("secret_access_key = \"...\" # copy from the old [storage] section\n");
745 recipe.push_str(
746 "\n(the endpoint and bucket fold into the URL; allow_http is scheme-derived; virtual-hosted addressing defaults on; the region is autodetected - append ?region=<x> to the URL only if your store insists. `pond storage check` verifies the result end-to-end, and `pond init` can apply this rewrite for you)",
747 );
748 Some(recipe)
749}
750
751fn detect_legacy_sources(path: &Path) -> Option<String> {
756 let text = std::fs::read_to_string(path).ok()?;
757 let value: toml::Value = toml::from_str(&text).ok()?;
758 value.get("sources")?.as_table()?;
759 Some(format!(
760 "config {} uses a [sources.*] block; the adapter map was renamed to [adapters.*]. Run `pond init` to migrate it, or rename each `[sources.<name>]` header to `[adapters.<name>]` by hand.",
761 path.display(),
762 ))
763}
764
765fn take_enabled(name: &str, blob: &Value) -> Option<(String, Value)> {
769 let enabled = blob
770 .get("enabled")
771 .and_then(Value::as_bool)
772 .unwrap_or(false);
773 if !enabled {
774 return None;
775 }
776 let mut clean = blob.clone();
777 if let Some(obj) = clean.as_object_mut() {
778 obj.remove("enabled");
779 }
780 Some((name.to_owned(), clean))
781}
782
783pub fn expand_home_under(path: &Path, home: &Path) -> PathBuf {
790 let Some(text) = path.to_str() else {
791 return path.to_path_buf();
792 };
793 let home_text = home.to_string_lossy();
794 let expanded = shellexpand::full_with_context_no_errors(
795 text,
796 || Some(home_text.clone()),
797 |var| std::env::var(var).ok(),
798 );
799 PathBuf::from(expanded.as_ref())
800}
801
802pub fn contract_home_under(path: &Path, home: &Path) -> PathBuf {
807 match path.strip_prefix(home) {
808 Ok(rest) if rest.as_os_str().is_empty() => PathBuf::from("~"),
809 Ok(rest) => Path::new("~").join(rest),
810 Err(_) => path.to_path_buf(),
811 }
812}
813
814pub fn contract_home(path: &Path) -> PathBuf {
818 match std::env::var_os("HOME") {
819 Some(home) => contract_home_under(path, Path::new(&home)),
820 None => path.to_path_buf(),
821 }
822}
823
824impl EmbeddingsConfig {
825 pub fn validate(&self) -> Result<()> {
829 if self.model.trim().is_empty() {
830 bail!("embeddings.model must be a non-empty HuggingFace model id");
831 }
832 if self.dim == 0 {
833 bail!("embeddings.dim must be positive; got {}", self.dim);
834 }
835 Ok(())
836 }
837
838 pub fn install_runtime(&self) {
842 crate::embed::init_model_id(self.model.clone());
843 crate::sessions::init_embedding_dim(self.dim);
844 }
845}
846
847pub fn write_config_file(path: &Path, contents: &str) -> Result<()> {
854 #[cfg(unix)]
855 {
856 use std::io::Write as _;
857 use std::os::unix::fs::{OpenOptionsExt as _, PermissionsExt as _};
858 let mut file = std::fs::OpenOptions::new()
859 .write(true)
860 .create(true)
861 .truncate(true)
862 .mode(0o600)
863 .open(path)
864 .with_context(|| format!("failed to write {}", path.display()))?;
865 file.set_permissions(std::fs::Permissions::from_mode(0o600))
867 .with_context(|| format!("failed to chmod 0600 {}", path.display()))?;
868 file.write_all(contents.as_bytes())
869 .with_context(|| format!("failed to write {}", path.display()))?;
870 }
871 #[cfg(not(unix))]
872 {
873 std::fs::write(path, contents)
874 .with_context(|| format!("failed to write {}", path.display()))?;
875 }
876 Ok(())
877}
878
879#[cfg(test)]
880mod tests {
881 #![allow(clippy::expect_used, clippy::unwrap_used, clippy::result_large_err)]
884
885 use super::*;
886 use serde_json::Value;
887 use tempfile::TempDir;
888
889 #[cfg(unix)]
890 #[test]
891 fn write_config_file_is_owner_only_0600() {
892 use std::os::unix::fs::PermissionsExt;
893 let dir = TempDir::new().unwrap();
894 let path = dir.path().join("config.toml");
895 std::fs::write(&path, "old").unwrap();
897 std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o644)).unwrap();
898 write_config_file(&path, "[creds.default]\nsecret_access_key = \"x\"\n").unwrap();
899 let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777;
900 assert_eq!(mode, 0o600, "config with secrets must be owner-only");
901 assert!(
902 std::fs::read_to_string(&path)
903 .unwrap()
904 .contains("secret_access_key")
905 );
906 }
907
908 #[test]
909 fn validate_catches_empty_model_and_bad_dim() {
910 assert!(EmbeddingsConfig::default().validate().is_ok());
911 let bad_model = EmbeddingsConfig {
914 model: " ".to_owned(),
915 dim: 768,
916 };
917 assert!(bad_model.validate().is_err());
918 let odd_dim = EmbeddingsConfig {
921 model: "intfloat/multilingual-e5-base".to_owned(),
922 dim: 100,
923 };
924 assert!(odd_dim.validate().is_ok());
925 let zero_dim = EmbeddingsConfig {
927 model: "intfloat/multilingual-e5-base".to_owned(),
928 dim: 0,
929 };
930 assert!(zero_dim.validate().is_err());
931 }
932
933 #[test]
934 fn config_load_missing_file_falls_back_to_builtin() {
935 let config = Config::load("/nonexistent/pond-config-xyz.toml").unwrap();
936 assert_eq!(config.embeddings, EmbeddingsConfig::default());
937 }
938
939 #[test]
940 fn default_config_toml_loads_to_the_builtin_defaults() {
941 let dir = TempDir::new().unwrap();
942 let path = dir.path().join("config.toml");
943 std::fs::write(&path, DEFAULT_CONFIG_TOML).unwrap();
944 let config = Config::load(&path).unwrap();
947 assert_eq!(config.embeddings, EmbeddingsConfig::default());
948 assert_eq!(config.embeddings.model, crate::embed::DEFAULT_MODEL_ID);
949 assert_eq!(
950 config.embeddings.dim,
951 crate::sessions::DEFAULT_EMBEDDING_DIM
952 );
953 }
954
955 #[test]
956 fn default_storage_path_follows_xdg_then_home() {
957 let resolved =
959 default_storage_path(Some(PathBuf::from("/xdg")), Some(PathBuf::from("/home")))
960 .unwrap();
961 assert!(is_local(&resolved));
962 assert_eq!(local_path(&resolved).unwrap(), PathBuf::from("/xdg/pond"));
963
964 let resolved = default_storage_path(
966 Some(PathBuf::from("relative")),
967 Some(PathBuf::from("/home")),
968 )
969 .unwrap();
970 assert_eq!(
971 local_path(&resolved).unwrap(),
972 PathBuf::from("/home/.local/share/pond"),
973 );
974
975 let resolved = default_storage_path(None, None).unwrap();
979 assert!(is_local(&resolved));
980 assert!(
981 local_path(&resolved).unwrap().ends_with(".pond"),
982 "fallback path should end with .pond: {resolved}",
983 );
984 }
985
986 #[test]
987 fn expand_home_under_handles_tilde_forms() {
988 let home = Path::new("/srv/me");
989 assert_eq!(
990 expand_home_under(Path::new("~"), home),
991 PathBuf::from("/srv/me")
992 );
993 assert_eq!(
994 expand_home_under(Path::new("~/.codex/sessions"), home),
995 PathBuf::from("/srv/me/.codex/sessions"),
996 );
997 assert_eq!(
999 expand_home_under(Path::new("/etc/passwd"), home),
1000 PathBuf::from("/etc/passwd"),
1001 );
1002 assert_eq!(
1004 expand_home_under(Path::new("~user/elsewhere"), home),
1005 PathBuf::from("~user/elsewhere"),
1006 );
1007 }
1008
1009 #[test]
1010 fn expand_home_under_handles_env_vars() {
1011 figment::Jail::expect_with(|jail| {
1013 jail.set_env("POND_TEST_EXPAND_DIR", "/srv/data");
1014 let home = Path::new("/srv/me");
1015 assert_eq!(
1016 expand_home_under(Path::new("$POND_TEST_EXPAND_DIR/pond"), home),
1017 PathBuf::from("/srv/data/pond"),
1018 );
1019 assert_eq!(
1020 expand_home_under(Path::new("${POND_TEST_EXPAND_DIR}/pond"), home),
1021 PathBuf::from("/srv/data/pond"),
1022 );
1023 assert_eq!(
1025 expand_home_under(Path::new("$POND_TEST_UNSET_VAR/x"), home),
1026 PathBuf::from("$POND_TEST_UNSET_VAR/x"),
1027 );
1028 Ok(())
1029 });
1030 }
1031
1032 #[test]
1033 fn contract_home_under_inverts_expansion() {
1034 let home = Path::new("/srv/me");
1035 assert_eq!(
1036 contract_home_under(Path::new("/srv/me/.local/share/pond"), home),
1037 PathBuf::from("~/.local/share/pond"),
1038 );
1039 assert_eq!(
1040 contract_home_under(Path::new("/srv/me"), home),
1041 PathBuf::from("~")
1042 );
1043 assert_eq!(
1045 contract_home_under(Path::new("/etc/passwd"), home),
1046 PathBuf::from("/etc/passwd"),
1047 );
1048 }
1049
1050 #[test]
1051 fn resolve_adapters_returns_one_or_all_or_errors() {
1052 let temp = TempDir::new().unwrap();
1053 let body = "\
1054[adapters.claude-code]
1055enabled = true
1056path = \"/srv/claude\"
1057
1058[adapters.codex-cli]
1059enabled = true
1060path = \"/srv/codex\"
1061
1062[adapters.opencode]
1063enabled = false
1064";
1065 let path = temp.path().join("config.toml");
1066 std::fs::write(&path, body).expect("write config");
1067 let config = Config::load(&path).unwrap();
1068
1069 let all = config.resolve_adapters(None).unwrap();
1071 assert_eq!(all.len(), 2);
1072 let names: Vec<_> = all.iter().map(|(n, _)| n.as_str()).collect();
1073 assert!(names.contains(&"claude-code"));
1074 assert!(names.contains(&"codex-cli"));
1075 for (_, blob) in &all {
1077 assert!(blob.get("enabled").is_none(), "enabled should be stripped");
1078 }
1079
1080 let one = config.resolve_adapters(Some("codex-cli")).unwrap();
1082 assert_eq!(one.len(), 1);
1083 assert_eq!(one[0].0, "codex-cli");
1084 assert_eq!(
1085 one[0].1.get("path").and_then(Value::as_str),
1086 Some("/srv/codex"),
1087 );
1088
1089 let disabled = config.resolve_adapters(Some("opencode"));
1091 let err = disabled
1092 .expect_err("disabled adapter must error")
1093 .to_string();
1094 assert!(err.contains("enabled = false"), "got: {err}");
1095 assert!(err.contains("pond sync opencode"), "got: {err}");
1096
1097 assert!(config.resolve_adapters(Some("nope")).is_err());
1099
1100 assert_eq!(config.disabled_adapter_names(), vec!["opencode"]);
1102 }
1103
1104 #[test]
1105 fn memory_uri_is_classified_as_remote() {
1106 let url = Url::parse("memory:///pond-remote-test").expect("memory uri parses");
1107 assert!(
1108 !is_local(&url),
1109 "memory:// is not a local-filesystem URL: {url}",
1110 );
1111 assert!(
1112 local_path(&url).is_none(),
1113 "local_path must return None for non-file schemes",
1114 );
1115 }
1116
1117 #[test]
1122 fn storage_and_creds_round_trip() {
1123 figment::Jail::expect_with(|jail| {
1124 jail.create_file(
1125 "config.toml",
1126 r#"
1127[storage]
1128path = "s3+https://nbg1.example.com/my-pond"
1129
1130[creds.default]
1131access_key_id = "AKIA123"
1132secret_access_key = "shh"
1133
1134[creds.work]
1135scope = "s3+https://fsn1.example.com/work-pond/"
1136access_key_id = "AKIA456"
1137secret_access_key_command = "op read op://vault/pond/secret"
1138region = "fsn1"
1139virtual_hosted_style_request = false
1140extra = { request_timeout = "60 seconds" }
1141"#,
1142 )?;
1143 let config = Config::load("config.toml").expect("config loads");
1144 assert_eq!(
1145 config.storage.path.as_deref(),
1146 Some("s3+https://nbg1.example.com/my-pond"),
1147 );
1148 assert_eq!(config.creds.len(), 2);
1149 let work = &config.creds["work"];
1150 assert_eq!(
1151 work.secret_access_key_command.as_deref(),
1152 Some("op read op://vault/pond/secret"),
1153 );
1154 assert_eq!(work.virtual_hosted_style_request, Some(false));
1155 assert_eq!(work.extra["request_timeout"], "60 seconds");
1156 Ok(())
1157 });
1158 }
1159
1160 #[test]
1161 fn creds_validators_reject_bad_shapes() {
1162 let cases: &[(&str, &str)] = &[
1163 ("[creds.a]\nacces_key_id = \"x\"\n", "acces_key_id"),
1165 ("[creds.my_set]\naccess_key_id = \"x\"\n", "[a-z][a-z0-9]"),
1167 ("[creds.A1]\naccess_key_id = \"x\"\n", "[a-z][a-z0-9]"),
1168 (
1170 "[creds.a]\nsecret_access_key = \"x\"\nsecret_access_key_command = \"cat\"\n",
1171 "more than one",
1172 ),
1173 (
1174 "[creds.a]\naccess_key_id = \"x\"\naccess_key_id_file = \"/k\"\n",
1175 "pick one",
1176 ),
1177 (
1179 "[creds.a]\naccess_key_id = \"x\"\n[creds.b]\naccess_key_id = \"y\"\n",
1180 "scope-less",
1181 ),
1182 (
1185 "[creds.a]\nscope = \"s3+https://h:443/b/\"\naccess_key_id = \"x\"\n[creds.b]\nscope = \"s3+https://h/b\"\naccess_key_id = \"y\"\n",
1186 "same scope",
1187 ),
1188 ];
1189 figment::Jail::expect_with(|jail| {
1190 for (body, needle) in cases {
1191 jail.create_file("config.toml", body)?;
1192 let err = Config::load("config.toml").expect_err(body).to_string();
1193 assert!(
1194 err.contains(needle),
1195 "want {needle:?} in error for {body:?}, got: {err}",
1196 );
1197 }
1198 Ok(())
1199 });
1200 }
1201
1202 #[test]
1203 fn valid_creds_set_name_matches_env_mirror_charset() {
1204 for ok in ["default", "work", "work2", "a", "abcdefghij123456"] {
1205 assert!(valid_creds_set_name(ok), "{ok:?} should be valid");
1206 }
1207 for bad in ["", "Work", "my_set", "2fast", "abcdefghij1234567", "set-1"] {
1208 assert!(!valid_creds_set_name(bad), "{bad:?} should be invalid");
1209 }
1210 }
1211
1212 #[test]
1213 fn legacy_storage_map_errors_with_the_rewrite_recipe() {
1214 figment::Jail::expect_with(|jail| {
1215 jail.create_file(
1216 "config.toml",
1217 r#"
1218[storage]
1219AWS_ACCESS_KEY_ID = "AKIA123"
1220AWS_SECRET_ACCESS_KEY = "shh"
1221AWS_REGION = "nbg1"
1222AWS_ENDPOINT = "https://ttq.nbg1.your-objectstorage.com"
1223aws_virtual_hosted_style_request = "true"
1224"#,
1225 )?;
1226 let err = Config::load("config.toml")
1227 .expect_err("legacy map must error")
1228 .to_string();
1229 assert!(err.contains("old [storage] passthrough map"), "got: {err}");
1231 assert!(
1235 err.contains("s3+https://nbg1.your-objectstorage.com/ttq/<prefix>"),
1236 "recipe must de-fold the virtual-hosted endpoint, got: {err}",
1237 );
1238 assert!(!err.contains("AKIA123"), "got: {err}");
1241 assert!(!err.contains("\"shh\""), "got: {err}");
1242 assert!(err.contains("access_key_id = \"...\""), "got: {err}");
1243 assert!(!err.contains("region ="), "got: {err}");
1247 assert!(err.contains("?region="), "got: {err}");
1248 assert!(err.contains("pond storage check"), "got: {err}");
1249 jail.create_file(
1252 "config.toml",
1253 r#"
1254[storage]
1255AWS_ACCESS_KEY_ID = "AKIA123"
1256AWS_ENDPOINT = "https://ttq.nbg1.your-objectstorage.com"
1257"#,
1258 )?;
1259 let err = Config::load("config.toml")
1260 .expect_err("legacy map must error")
1261 .to_string();
1262 assert!(
1263 err.contains("s3+https://ttq.nbg1.your-objectstorage.com/<bucket>/<prefix>"),
1264 "got: {err}",
1265 );
1266 Ok(())
1267 });
1268 }
1269
1270 #[test]
1271 fn legacy_sources_block_errors_with_the_adapters_recipe() {
1272 figment::Jail::expect_with(|jail| {
1273 jail.create_file(
1274 "config.toml",
1275 "[sources.claude-code]\nenabled = true\npath = \"/srv/claude\"\n",
1276 )?;
1277 let err = Config::load("config.toml")
1278 .expect_err("legacy [sources.*] must error")
1279 .to_string();
1280 assert!(err.contains("[adapters.*]"), "names the new key: {err}");
1281 assert!(err.contains("pond init"), "points at the fix: {err}");
1282 Ok(())
1283 });
1284 }
1285
1286 #[test]
1287 fn env_mirror_layers_over_file() {
1288 figment::Jail::expect_with(|jail| {
1289 jail.create_file(
1290 "config.toml",
1291 r#"
1292[storage]
1293path = "/from-file"
1294
1295[creds.work]
1296scope = "s3://file-bucket/"
1297access_key_id = "from-file"
1298region = "file-region"
1299"#,
1300 )?;
1301 jail.set_env("POND_STORAGE_PATH", "/from-env");
1303 jail.set_env("POND_CREDS_WORK_ACCESS_KEY_ID", "from-env");
1304 jail.set_env("POND_CREDS_WORK_SECRET_ACCESS_KEY", "12345");
1306 jail.set_env("POND_CREDS_CI_ACCESS_KEY_ID", "ci-key");
1308 let config = Config::load("config.toml").expect("env+file config loads");
1309 assert_eq!(config.storage.path.as_deref(), Some("/from-env"));
1310 let work = &config.creds["work"];
1311 assert_eq!(work.access_key_id.as_deref(), Some("from-env"));
1312 assert_eq!(work.secret_access_key.as_deref(), Some("12345"));
1313 assert_eq!(work.region.as_deref(), Some("file-region"));
1314 assert_eq!(work.scope.as_deref(), Some("s3://file-bucket/"));
1315 assert_eq!(config.creds["ci"].access_key_id.as_deref(), Some("ci-key"));
1316 Ok(())
1317 });
1318 }
1319}