Skip to main content

kaizen/core/
config.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Config loading: workspace `.kaizen/config.toml` then `~/.kaizen/config.toml`.
3//! Missing files → defaults. User config wins on overlap.
4
5use anyhow::Result;
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
10pub struct ScanConfig {
11    pub roots: Vec<String>,
12    /// Minimum seconds between full agent transcript rescans when `--refresh` is not passed.
13    #[serde(default = "default_min_rescan_seconds")]
14    pub min_rescan_seconds: u64,
15}
16
17fn default_min_rescan_seconds() -> u64 {
18    300
19}
20
21impl Default for ScanConfig {
22    fn default() -> Self {
23        Self {
24            roots: vec!["~/.cursor/projects".to_string()],
25            min_rescan_seconds: default_min_rescan_seconds(),
26        }
27    }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct CursorSourceConfig {
32    pub enabled: bool,
33    pub transcript_glob: String,
34}
35
36impl Default for CursorSourceConfig {
37    fn default() -> Self {
38        Self {
39            enabled: true,
40            transcript_glob: "*/agent-transcripts".to_string(),
41        }
42    }
43}
44
45/// Enable tier-1 tail ingestion for agents that store data outside Cursor/Claude/Codex paths.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct TailAgentToggles {
48    #[serde(default = "default_true")]
49    pub goose: bool,
50    #[serde(default = "default_true")]
51    pub openclaw: bool,
52    #[serde(default = "default_true")]
53    pub opencode: bool,
54    #[serde(default = "default_true")]
55    pub copilot_cli: bool,
56    #[serde(default = "default_true")]
57    pub copilot_vscode: bool,
58}
59
60impl Default for TailAgentToggles {
61    fn default() -> Self {
62        Self {
63            goose: true,
64            openclaw: true,
65            opencode: true,
66            copilot_cli: true,
67            copilot_vscode: true,
68        }
69    }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize, Default)]
73pub struct SourcesConfig {
74    #[serde(default)]
75    pub cursor: CursorSourceConfig,
76    #[serde(default)]
77    pub tail: TailAgentToggles,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
81pub struct RetentionConfig {
82    pub hot_days: u32,
83    pub warm_days: u32,
84}
85
86impl Default for RetentionConfig {
87    fn default() -> Self {
88        Self {
89            hot_days: 30,
90            warm_days: 90,
91        }
92    }
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
96pub struct StorageConfig {
97    pub hot_max_bytes: String,
98    pub cold_after_days: u32,
99    pub retention_days: u32,
100    pub flush_hour_utc: u8,
101}
102
103impl Default for StorageConfig {
104    fn default() -> Self {
105        Self {
106            hot_max_bytes: "1GB".into(),
107            cold_after_days: 7,
108            retention_days: 90,
109            flush_hour_utc: 0,
110        }
111    }
112}
113
114impl StorageConfig {
115    pub fn hot_max_bytes_value(&self) -> u64 {
116        parse_byte_size(&self.hot_max_bytes).unwrap_or(1_073_741_824)
117    }
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct SyncConfig {
122    /// When empty, sync is disabled (no outbox enqueue, `sync run` no-ops flush).
123    #[serde(default)]
124    pub endpoint: String,
125    #[serde(default)]
126    pub team_token: String,
127    #[serde(default)]
128    pub team_id: String,
129    #[serde(default = "default_events_per_batch")]
130    pub events_per_batch_max: usize,
131    #[serde(default = "default_max_body_bytes")]
132    pub max_body_bytes: usize,
133    #[serde(default = "default_flush_interval_ms")]
134    pub flush_interval_ms: u64,
135    #[serde(default = "default_sample_rate")]
136    pub sample_rate: f64,
137    /// 64 hex chars (32 bytes). Prefer `~/.kaizen/config.toml` only; never committed workspace secrets.
138    #[serde(default)]
139    pub team_salt_hex: String,
140}
141
142fn default_events_per_batch() -> usize {
143    500
144}
145
146fn default_max_body_bytes() -> usize {
147    1_000_000
148}
149
150fn default_flush_interval_ms() -> u64 {
151    10_000
152}
153
154fn default_sample_rate() -> f64 {
155    1.0
156}
157
158impl Default for SyncConfig {
159    fn default() -> Self {
160        Self {
161            endpoint: String::new(),
162            team_token: String::new(),
163            team_id: String::new(),
164            events_per_batch_max: default_events_per_batch(),
165            max_body_bytes: default_max_body_bytes(),
166            flush_interval_ms: default_flush_interval_ms(),
167            sample_rate: default_sample_rate(),
168            team_salt_hex: String::new(),
169        }
170    }
171}
172
173/// Parse `team_salt_hex` into 32 bytes. Returns `None` if missing or invalid.
174pub fn try_team_salt(cfg: &SyncConfig) -> Option<[u8; 32]> {
175    let h = cfg.team_salt_hex.trim();
176    if h.len() != 64 {
177        return None;
178    }
179    let bytes = hex::decode(h).ok()?;
180    bytes.try_into().ok()
181}
182
183/// Resolve a 32-byte redaction salt for telemetry-only flows (push/test) when sync is not
184/// configured. Order: configured `[sync].team_salt_hex` → `<kaizen_home>/local_salt.hex`
185/// → freshly generated and persisted at `0o600`. Telemetry never blocks on cloud sync.
186pub fn effective_redaction_salt(
187    cfg: &SyncConfig,
188    kaizen_home: &std::path::Path,
189) -> Result<[u8; 32]> {
190    if let Some(s) = try_team_salt(cfg) {
191        return Ok(s);
192    }
193    let path = kaizen_home.join("local_salt.hex");
194    if let Some(s) = read_local_salt(&path)? {
195        return Ok(s);
196    }
197    let bytes = generate_local_salt();
198    write_local_salt(&path, &bytes)?;
199    Ok(bytes)
200}
201
202fn read_local_salt(path: &std::path::Path) -> Result<Option<[u8; 32]>> {
203    use std::io::ErrorKind;
204    match std::fs::read_to_string(path) {
205        Ok(s) => Ok(parse_salt_hex(s.trim())),
206        Err(e) if e.kind() == ErrorKind::NotFound => Ok(None),
207        Err(e) => Err(e.into()),
208    }
209}
210
211fn parse_salt_hex(h: &str) -> Option<[u8; 32]> {
212    if h.len() != 64 {
213        return None;
214    }
215    hex::decode(h).ok()?.try_into().ok()
216}
217
218fn generate_local_salt() -> [u8; 32] {
219    use rand::Rng;
220    let mut bytes = [0u8; 32];
221    rand::rng().fill_bytes(&mut bytes);
222    bytes
223}
224
225fn write_local_salt(path: &std::path::Path, bytes: &[u8; 32]) -> Result<()> {
226    if let Some(parent) = path.parent() {
227        std::fs::create_dir_all(parent)?;
228    }
229    let hex_s = hex::encode(bytes);
230    std::fs::write(path, hex_s.as_bytes())?;
231    set_user_only_perms(path)?;
232    Ok(())
233}
234
235#[cfg(unix)]
236fn set_user_only_perms(path: &std::path::Path) -> Result<()> {
237    use std::os::unix::fs::PermissionsExt;
238    let mut perms = std::fs::metadata(path)?.permissions();
239    perms.set_mode(0o600);
240    std::fs::set_permissions(path, perms)?;
241    Ok(())
242}
243
244#[cfg(not(unix))]
245fn set_user_only_perms(_path: &std::path::Path) -> Result<()> {
246    Ok(())
247}
248
249fn default_true() -> bool {
250    true
251}
252
253fn default_telemetry_fail_open() -> bool {
254    true
255}
256
257fn default_cache_ttl_seconds() -> u64 {
258    3600
259}
260
261/// Which third-party system is the single source for query-back / pull; OTLP is export-only, not a pull target.
262#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
263#[serde(rename_all = "lowercase")]
264pub enum QueryAuthority {
265    #[default]
266    None,
267    Posthog,
268    Datadog,
269}
270
271/// Per-field allowlist: when `false` (default), the field is omitted or hashed in telemetry exports.
272#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
273pub struct IdentityAllowlist {
274    #[serde(default)]
275    pub team: bool,
276    #[serde(default)]
277    pub workspace_label: bool,
278    #[serde(default)]
279    pub runner_label: bool,
280    #[serde(default)]
281    pub actor_kind: bool,
282    #[serde(default)]
283    pub actor_label: bool,
284    #[serde(default)]
285    pub agent: bool,
286    #[serde(default)]
287    pub model: bool,
288    #[serde(default)]
289    pub env: bool,
290    #[serde(default)]
291    pub job: bool,
292    #[serde(default)]
293    pub branch: bool,
294}
295
296/// Remote pull: query authority, cache TTL, and which identity labels may leave as cleartext.
297#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
298pub struct TelemetryQueryConfig {
299    /// `posthog` or `datadog` enables provider pull when implemented; `none` or unset = no query authority.
300    #[serde(default)]
301    pub provider: QueryAuthority,
302    /// Seconds to treat remote cache rows as fresh (unless the CLI requests `--refresh`).
303    #[serde(default = "default_cache_ttl_seconds")]
304    pub cache_ttl_seconds: u64,
305    #[serde(default)]
306    pub identity_allowlist: IdentityAllowlist,
307}
308
309impl Default for TelemetryQueryConfig {
310    fn default() -> Self {
311        Self {
312            provider: QueryAuthority::default(),
313            cache_ttl_seconds: default_cache_ttl_seconds(),
314            identity_allowlist: IdentityAllowlist::default(),
315        }
316    }
317}
318
319impl TelemetryQueryConfig {
320    /// True when a PostHog or Datadog pull backend may be used (OTLP is not a pull target).
321    pub fn has_provider_for_pull(&self) -> bool {
322        matches!(
323            self.provider,
324            QueryAuthority::Posthog | QueryAuthority::Datadog
325        )
326    }
327}
328
329/// How to reduce billed input to the model (opt-in; default leaves requests unchanged).
330#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
331#[serde(tag = "type", rename_all = "snake_case")]
332pub enum ContextPolicy {
333    /// No transformation beyond optional JSON minify (same tokens as a direct call).
334    #[default]
335    None,
336    /// Keep the last `count` `messages` array entries; system blocks unchanged when present.
337    LastMessages { count: usize },
338    /// Drop oldest messages until a rough `chars/4` estimate stays at or below `max`.
339    MaxInputTokens { max: u32 },
340}
341
342/// Anthropic API-compatible HTTP proxy: forward + local telemetry. See `docs/llm-proxy.md`.
343#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
344pub struct ProxyConfig {
345    /// e.g. `127.0.0.1:3847` (bind address for `kaizen proxy run`).
346    #[serde(default = "default_proxy_listen")]
347    pub listen: String,
348    /// Base URL, no trailing slash, e.g. `https://api.anthropic.com`.
349    #[serde(default = "default_proxy_upstream")]
350    pub upstream: String,
351    /// `anthropic`, `openai`, or `auto`; controls launcher/env hints and default upstream.
352    #[serde(default = "default_proxy_provider")]
353    pub provider: String,
354    /// Prefer `Accept-Encoding: gzip` to upstream (response bodies may be gzip).
355    #[serde(default = "default_true")]
356    pub compress_transport: bool,
357    /// Re-encode JSON bodies to compact `serde_json` (no key reorder; whitespace only).
358    #[serde(default = "default_true")]
359    pub minify_json: bool,
360    /// Slurp cap for a single upstream response (streaming not yet teed; see doc).
361    #[serde(default = "default_proxy_max_body_mb")]
362    pub max_response_body_mb: u32,
363    /// Reject / fail incoming client bodies above this (POST bodies before forward).
364    #[serde(default = "default_proxy_max_request_body_mb")]
365    pub max_request_body_mb: u32,
366    /// Optional token-aware truncation of `messages` in JSON bodies.
367    #[serde(default)]
368    pub context_policy: ContextPolicy,
369}
370
371fn default_proxy_listen() -> String {
372    "127.0.0.1:3847".to_string()
373}
374
375fn default_proxy_upstream() -> String {
376    "https://api.anthropic.com".to_string()
377}
378
379fn default_proxy_provider() -> String {
380    "anthropic".to_string()
381}
382
383fn default_proxy_max_body_mb() -> u32 {
384    256
385}
386
387fn default_proxy_max_request_body_mb() -> u32 {
388    32
389}
390
391impl Default for ProxyConfig {
392    fn default() -> Self {
393        Self {
394            listen: default_proxy_listen(),
395            upstream: default_proxy_upstream(),
396            provider: default_proxy_provider(),
397            compress_transport: true,
398            minify_json: true,
399            max_response_body_mb: default_proxy_max_body_mb(),
400            max_request_body_mb: default_proxy_max_request_body_mb(),
401            context_policy: ContextPolicy::default(),
402        }
403    }
404}
405
406/// Optional third-party telemetry sinks; same redacted batches as Kaizen sync.
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub struct TelemetryConfig {
409    /// When `true` (default), ignore exporter errors; when `false`, `flush` fails if any secondary errors.
410    #[serde(default = "default_telemetry_fail_open")]
411    pub fail_open: bool,
412    /// Query-back / pull API: authority, cache TTL, identity allowlist.
413    #[serde(default)]
414    pub query: TelemetryQueryConfig,
415    /// Declarative list; `type = "none"` rows are accepted and ignored.
416    #[serde(default)]
417    pub exporters: Vec<ExporterConfig>,
418}
419
420impl Default for TelemetryConfig {
421    fn default() -> Self {
422        Self {
423            fail_open: default_telemetry_fail_open(),
424            query: TelemetryQueryConfig::default(),
425            exporters: Vec::new(),
426        }
427    }
428}
429
430/// One pluggable sink; TOML `type` is the tag.
431#[derive(Debug, Clone, Serialize, Deserialize)]
432#[serde(tag = "type", rename_all = "lowercase")]
433pub enum ExporterConfig {
434    /// No-op row for sparse tables / templates.
435    None,
436    /// Append summary JSON lines to a local NDJSON file (default `<workspace>/.kaizen/telemetry.ndjson`).
437    File {
438        #[serde(default = "default_true")]
439        enabled: bool,
440        #[serde(default)]
441        path: Option<String>,
442    },
443    /// Echo to tracing (for wiring tests; requires the `telemetry-dev` build feature).
444    Dev {
445        #[serde(default = "default_true")]
446        enabled: bool,
447    },
448    PostHog {
449        #[serde(default = "default_true")]
450        enabled: bool,
451        /// e.g. `https://us.i.posthog.com` (default when unset)
452        host: Option<String>,
453        /// Prefer env `POSTHOG_API_KEY` or `KAIZEN_POSTHOG_API_KEY`
454        project_api_key: Option<String>,
455    },
456    Datadog {
457        #[serde(default = "default_true")]
458        enabled: bool,
459        /// e.g. `datadoghq.com`; env `DD_SITE` overrides
460        site: Option<String>,
461        /// Prefer env `DD_API_KEY` or `KAIZEN_DD_API_KEY`
462        api_key: Option<String>,
463    },
464    Otlp {
465        #[serde(default = "default_true")]
466        enabled: bool,
467        /// Env `OTEL_EXPORTER_OTLP_ENDPOINT` (or KAIZEN_ prefix) when unset here
468        endpoint: Option<String>,
469    },
470}
471
472impl ExporterConfig {
473    /// Whether this row should be considered for `load_exporters` (excludes `None`).
474    pub fn is_enabled(&self) -> bool {
475        match self {
476            ExporterConfig::None => false,
477            ExporterConfig::File { enabled, .. } => *enabled,
478            ExporterConfig::Dev { enabled, .. } => *enabled,
479            ExporterConfig::PostHog { enabled, .. } => *enabled,
480            ExporterConfig::Datadog { enabled, .. } => *enabled,
481            ExporterConfig::Otlp { enabled, .. } => *enabled,
482        }
483    }
484}
485
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct EvalConfig {
488    #[serde(default)]
489    pub enabled: bool,
490    #[serde(default = "default_eval_endpoint")]
491    pub endpoint: String,
492    #[serde(default)]
493    pub api_key: String,
494    #[serde(default = "default_eval_model")]
495    pub model: String,
496    #[serde(default = "default_eval_rubric")]
497    pub rubric: String,
498    #[serde(default = "default_eval_batch_size")]
499    pub batch_size: usize,
500    #[serde(default = "default_eval_min_cost")]
501    pub min_cost_usd: f64,
502}
503
504impl Default for EvalConfig {
505    fn default() -> Self {
506        Self {
507            enabled: false,
508            endpoint: default_eval_endpoint(),
509            api_key: String::new(),
510            model: default_eval_model(),
511            rubric: default_eval_rubric(),
512            batch_size: default_eval_batch_size(),
513            min_cost_usd: default_eval_min_cost(),
514        }
515    }
516}
517
518fn default_eval_endpoint() -> String {
519    "https://api.anthropic.com".into()
520}
521fn default_eval_model() -> String {
522    "claude-haiku-4-5-20251001".into()
523}
524fn default_eval_rubric() -> String {
525    "tool-efficiency-v1".into()
526}
527fn default_eval_batch_size() -> usize {
528    20
529}
530fn default_eval_min_cost() -> f64 {
531    0.01
532}
533
534#[derive(Debug, Clone, Serialize, Deserialize)]
535pub struct GuidanceProposalConfig {
536    #[serde(default)]
537    pub enabled: bool,
538    #[serde(default = "default_guidance_endpoint")]
539    pub endpoint: String,
540    #[serde(default)]
541    pub api_key: String,
542    #[serde(default = "default_guidance_model")]
543    pub model: String,
544    #[serde(default = "default_guidance_max_ops")]
545    pub max_ops: usize,
546    #[serde(default = "default_true")]
547    pub redact: bool,
548}
549
550#[derive(Debug, Clone, Serialize, Deserialize, Default)]
551pub struct GuidanceConfig {
552    #[serde(default)]
553    pub proposals: GuidanceProposalConfig,
554}
555
556impl Default for GuidanceProposalConfig {
557    fn default() -> Self {
558        Self {
559            enabled: false,
560            endpoint: default_guidance_endpoint(),
561            api_key: String::new(),
562            model: default_guidance_model(),
563            max_ops: default_guidance_max_ops(),
564            redact: true,
565        }
566    }
567}
568
569fn default_guidance_endpoint() -> String {
570    default_eval_endpoint()
571}
572fn default_guidance_model() -> String {
573    default_eval_model()
574}
575fn default_guidance_max_ops() -> usize {
576    3
577}
578
579/// Opt-in post-hook outcome measurement (Tier C).
580#[derive(Debug, Clone, Serialize, Deserialize)]
581pub struct CollectOutcomesConfig {
582    #[serde(default)]
583    pub enabled: bool,
584    #[serde(default = "default_outcomes_test_cmd")]
585    pub test_cmd: String,
586    #[serde(default = "default_outcomes_timeout_secs")]
587    pub timeout_secs: u64,
588    #[serde(default)]
589    pub lint_cmd: Option<String>,
590}
591
592fn default_outcomes_test_cmd() -> String {
593    "cargo test --quiet".to_string()
594}
595
596fn default_outcomes_timeout_secs() -> u64 {
597    600
598}
599
600impl Default for CollectOutcomesConfig {
601    fn default() -> Self {
602        Self {
603            enabled: false,
604            test_cmd: default_outcomes_test_cmd(),
605            timeout_secs: default_outcomes_timeout_secs(),
606            lint_cmd: None,
607        }
608    }
609}
610
611/// Opt-in per-process sampling (Tier D).
612#[derive(Debug, Clone, Serialize, Deserialize)]
613pub struct CollectSystemSamplerConfig {
614    #[serde(default)]
615    pub enabled: bool,
616    #[serde(default = "default_sampler_sample_ms")]
617    pub sample_ms: u64,
618    #[serde(default = "default_sampler_max_samples")]
619    pub max_samples_per_session: u32,
620}
621
622fn default_sampler_sample_ms() -> u64 {
623    2000
624}
625
626fn default_sampler_max_samples() -> u32 {
627    3600
628}
629
630impl Default for CollectSystemSamplerConfig {
631    fn default() -> Self {
632        Self {
633            enabled: false,
634            sample_ms: default_sampler_sample_ms(),
635            max_samples_per_session: default_sampler_max_samples(),
636        }
637    }
638}
639
640#[derive(Debug, Clone, Serialize, Deserialize, Default)]
641pub struct CollectConfig {
642    #[serde(default)]
643    pub outcomes: CollectOutcomesConfig,
644    #[serde(default)]
645    pub system_sampler: CollectSystemSamplerConfig,
646}
647
648#[derive(Debug, Clone, Serialize, Deserialize, Default)]
649pub struct Config {
650    #[serde(default)]
651    pub scan: ScanConfig,
652    #[serde(default)]
653    pub sources: SourcesConfig,
654    #[serde(default)]
655    pub retention: RetentionConfig,
656    #[serde(default)]
657    pub storage: StorageConfig,
658    #[serde(default)]
659    pub sync: SyncConfig,
660    #[serde(default)]
661    pub telemetry: TelemetryConfig,
662    #[serde(default)]
663    pub proxy: ProxyConfig,
664    #[serde(default)]
665    pub eval: EvalConfig,
666    #[serde(default)]
667    pub guidance: GuidanceConfig,
668    #[serde(default)]
669    pub collect: CollectConfig,
670}
671
672/// Load config: `~/.kaizen/projects/<slug>/config.toml` then `~/.kaizen/config.toml`.
673/// User config wins on overlap. Missing files → defaults, not error.
674pub fn load(workspace: &Path) -> Result<Config> {
675    let project_cfg = crate::core::paths::project_data_dir(workspace)
676        .ok()
677        .map(|d| d.join("config.toml"));
678    let user_path = crate::core::paths::kaizen_dir()
679        .ok_or_else(|| anyhow::anyhow!("KAIZEN_HOME / HOME unset"))?
680        .join("config.toml");
681
682    let base = project_cfg
683        .as_deref()
684        .and_then(load_file)
685        .unwrap_or_default();
686    let user = load_file(&user_path).unwrap_or_default();
687    Ok(merge(base, user))
688}
689
690fn load_file(path: &Path) -> Option<Config> {
691    let text = std::fs::read_to_string(path).ok()?;
692    toml::from_str(&text).ok()
693}
694
695fn merge(base: Config, user: Config) -> Config {
696    Config {
697        scan: merge_scan(base.scan, user.scan),
698        sources: merge_sources(base.sources, user.sources),
699        retention: merge_retention(base.retention, user.retention),
700        storage: merge_storage(base.storage, user.storage),
701        sync: merge_sync(base.sync, user.sync),
702        telemetry: merge_telemetry(base.telemetry, user.telemetry),
703        proxy: merge_proxy(base.proxy, user.proxy),
704        eval: merge_eval(base.eval, user.eval),
705        guidance: merge_guidance(base.guidance, user.guidance),
706        collect: merge_collect(base.collect, user.collect),
707    }
708}
709
710fn merge_guidance(base: GuidanceConfig, user: GuidanceConfig) -> GuidanceConfig {
711    GuidanceConfig {
712        proposals: merge_guidance_proposals(base.proposals, user.proposals),
713    }
714}
715
716fn merge_guidance_proposals(
717    base: GuidanceProposalConfig,
718    user: GuidanceProposalConfig,
719) -> GuidanceProposalConfig {
720    let def = GuidanceProposalConfig::default();
721    GuidanceProposalConfig {
722        enabled: pick_bool(user.enabled, base.enabled, def.enabled),
723        endpoint: pick_string(user.endpoint, base.endpoint, def.endpoint),
724        api_key: if user.api_key.is_empty() {
725            base.api_key
726        } else {
727            user.api_key
728        },
729        model: pick_string(user.model, base.model, def.model),
730        max_ops: if user.max_ops != def.max_ops {
731            user.max_ops
732        } else {
733            base.max_ops
734        },
735        redact: pick_bool(user.redact, base.redact, def.redact),
736    }
737}
738
739fn pick_bool(user: bool, base: bool, def: bool) -> bool {
740    if user != def { user } else { base }
741}
742
743fn pick_string(user: String, base: String, def: String) -> String {
744    if user != def { user } else { base }
745}
746
747fn merge_collect(base: CollectConfig, user: CollectConfig) -> CollectConfig {
748    let def = CollectConfig::default();
749    CollectConfig {
750        outcomes: merge_collect_outcomes(base.outcomes, user.outcomes, def.outcomes),
751        system_sampler: merge_collect_sampler(
752            base.system_sampler,
753            user.system_sampler,
754            def.system_sampler,
755        ),
756    }
757}
758
759fn merge_collect_outcomes(
760    base: CollectOutcomesConfig,
761    user: CollectOutcomesConfig,
762    def: CollectOutcomesConfig,
763) -> CollectOutcomesConfig {
764    CollectOutcomesConfig {
765        enabled: if user.enabled != def.enabled {
766            user.enabled
767        } else {
768            base.enabled
769        },
770        test_cmd: if user.test_cmd != def.test_cmd {
771            user.test_cmd
772        } else {
773            base.test_cmd
774        },
775        timeout_secs: if user.timeout_secs != def.timeout_secs {
776            user.timeout_secs
777        } else {
778            base.timeout_secs
779        },
780        lint_cmd: user.lint_cmd.or(base.lint_cmd),
781    }
782}
783
784fn merge_collect_sampler(
785    base: CollectSystemSamplerConfig,
786    user: CollectSystemSamplerConfig,
787    def: CollectSystemSamplerConfig,
788) -> CollectSystemSamplerConfig {
789    CollectSystemSamplerConfig {
790        enabled: if user.enabled != def.enabled {
791            user.enabled
792        } else {
793            base.enabled
794        },
795        sample_ms: if user.sample_ms != def.sample_ms {
796            user.sample_ms
797        } else {
798            base.sample_ms
799        },
800        max_samples_per_session: if user.max_samples_per_session != def.max_samples_per_session {
801            user.max_samples_per_session
802        } else {
803            base.max_samples_per_session
804        },
805    }
806}
807
808fn merge_sources(base: SourcesConfig, user: SourcesConfig) -> SourcesConfig {
809    let def = SourcesConfig::default();
810    SourcesConfig {
811        cursor: merge_cursor_source(base.cursor, user.cursor, def.cursor),
812        tail: merge_tail_toggles(base.tail, user.tail, def.tail),
813    }
814}
815
816fn merge_cursor_source(
817    base: CursorSourceConfig,
818    user: CursorSourceConfig,
819    def: CursorSourceConfig,
820) -> CursorSourceConfig {
821    CursorSourceConfig {
822        enabled: if user.enabled != def.enabled {
823            user.enabled
824        } else {
825            base.enabled
826        },
827        transcript_glob: if user.transcript_glob != def.transcript_glob {
828            user.transcript_glob
829        } else {
830            base.transcript_glob
831        },
832    }
833}
834
835fn merge_tail_toggles(
836    base: TailAgentToggles,
837    user: TailAgentToggles,
838    def: TailAgentToggles,
839) -> TailAgentToggles {
840    TailAgentToggles {
841        goose: if user.goose != def.goose {
842            user.goose
843        } else {
844            base.goose
845        },
846        openclaw: if user.openclaw != def.openclaw {
847            user.openclaw
848        } else {
849            base.openclaw
850        },
851        opencode: if user.opencode != def.opencode {
852            user.opencode
853        } else {
854            base.opencode
855        },
856        copilot_cli: if user.copilot_cli != def.copilot_cli {
857            user.copilot_cli
858        } else {
859            base.copilot_cli
860        },
861        copilot_vscode: if user.copilot_vscode != def.copilot_vscode {
862            user.copilot_vscode
863        } else {
864            base.copilot_vscode
865        },
866    }
867}
868
869fn merge_eval(base: EvalConfig, user: EvalConfig) -> EvalConfig {
870    let def = EvalConfig::default();
871    EvalConfig {
872        enabled: if user.enabled != def.enabled {
873            user.enabled
874        } else {
875            base.enabled
876        },
877        endpoint: if user.endpoint != def.endpoint {
878            user.endpoint
879        } else {
880            base.endpoint
881        },
882        api_key: if !user.api_key.is_empty() {
883            user.api_key
884        } else {
885            base.api_key
886        },
887        model: if user.model != def.model {
888            user.model
889        } else {
890            base.model
891        },
892        rubric: if user.rubric != def.rubric {
893            user.rubric
894        } else {
895            base.rubric
896        },
897        batch_size: if user.batch_size != def.batch_size {
898            user.batch_size
899        } else {
900            base.batch_size
901        },
902        min_cost_usd: if user.min_cost_usd != def.min_cost_usd {
903            user.min_cost_usd
904        } else {
905            base.min_cost_usd
906        },
907    }
908}
909
910fn merge_scan(base: ScanConfig, user: ScanConfig) -> ScanConfig {
911    let def = ScanConfig::default();
912    ScanConfig {
913        roots: if user.roots != def.roots {
914            user.roots
915        } else {
916            base.roots
917        },
918        min_rescan_seconds: if user.min_rescan_seconds != def.min_rescan_seconds {
919            user.min_rescan_seconds
920        } else {
921            base.min_rescan_seconds
922        },
923    }
924}
925
926fn merge_retention(base: RetentionConfig, user: RetentionConfig) -> RetentionConfig {
927    let def = RetentionConfig::default();
928    RetentionConfig {
929        hot_days: if user.hot_days != def.hot_days {
930            user.hot_days
931        } else {
932            base.hot_days
933        },
934        warm_days: if user.warm_days != def.warm_days {
935            user.warm_days
936        } else {
937            base.warm_days
938        },
939    }
940}
941
942fn merge_storage(base: StorageConfig, user: StorageConfig) -> StorageConfig {
943    let def = StorageConfig::default();
944    StorageConfig {
945        hot_max_bytes: if user.hot_max_bytes != def.hot_max_bytes {
946            user.hot_max_bytes
947        } else {
948            base.hot_max_bytes
949        },
950        cold_after_days: if user.cold_after_days != def.cold_after_days {
951            user.cold_after_days
952        } else {
953            base.cold_after_days
954        },
955        retention_days: if user.retention_days != def.retention_days {
956            user.retention_days
957        } else {
958            base.retention_days
959        },
960        flush_hour_utc: if user.flush_hour_utc != def.flush_hour_utc {
961            user.flush_hour_utc
962        } else {
963            base.flush_hour_utc
964        },
965    }
966}
967
968fn parse_byte_size(raw: &str) -> Option<u64> {
969    let s = raw.trim();
970    let digits = s
971        .chars()
972        .take_while(|c| c.is_ascii_digit())
973        .collect::<String>();
974    let n = digits.parse::<u64>().ok()?;
975    let unit = s[digits.len()..].trim().to_ascii_lowercase();
976    Some(match unit.as_str() {
977        "" | "b" => n,
978        "kb" | "kib" => n.saturating_mul(1024),
979        "mb" | "mib" => n.saturating_mul(1024 * 1024),
980        "gb" | "gib" => n.saturating_mul(1024 * 1024 * 1024),
981        _ => return None,
982    })
983}
984
985fn merge_proxy(base: ProxyConfig, user: ProxyConfig) -> ProxyConfig {
986    let def = ProxyConfig::default();
987    ProxyConfig {
988        listen: if user.listen != def.listen {
989            user.listen
990        } else {
991            base.listen
992        },
993        upstream: if user.upstream != def.upstream {
994            user.upstream
995        } else {
996            base.upstream
997        },
998        provider: if user.provider != def.provider {
999            user.provider
1000        } else {
1001            base.provider
1002        },
1003        compress_transport: if user.compress_transport != def.compress_transport {
1004            user.compress_transport
1005        } else {
1006            base.compress_transport
1007        },
1008        minify_json: if user.minify_json != def.minify_json {
1009            user.minify_json
1010        } else {
1011            base.minify_json
1012        },
1013        max_response_body_mb: if user.max_response_body_mb != def.max_response_body_mb {
1014            user.max_response_body_mb
1015        } else {
1016            base.max_response_body_mb
1017        },
1018        max_request_body_mb: if user.max_request_body_mb != def.max_request_body_mb {
1019            user.max_request_body_mb
1020        } else {
1021            base.max_request_body_mb
1022        },
1023        context_policy: if user.context_policy != def.context_policy {
1024            user.context_policy
1025        } else {
1026            base.context_policy
1027        },
1028    }
1029}
1030
1031fn merge_telemetry(base: TelemetryConfig, user: TelemetryConfig) -> TelemetryConfig {
1032    let def = TelemetryConfig::default();
1033    let fail_open = if user.fail_open != def.fail_open {
1034        user.fail_open
1035    } else {
1036        base.fail_open
1037    };
1038    let query = merge_telemetry_query(base.query, user.query);
1039    let exporters = if !user.exporters.is_empty() {
1040        user.exporters
1041    } else {
1042        base.exporters
1043    };
1044    TelemetryConfig {
1045        fail_open,
1046        query,
1047        exporters,
1048    }
1049}
1050
1051fn merge_telemetry_query(
1052    base: TelemetryQueryConfig,
1053    user: TelemetryQueryConfig,
1054) -> TelemetryQueryConfig {
1055    let def = TelemetryQueryConfig::default();
1056    TelemetryQueryConfig {
1057        provider: if user.provider != def.provider {
1058            user.provider
1059        } else {
1060            base.provider
1061        },
1062        cache_ttl_seconds: if user.cache_ttl_seconds != def.cache_ttl_seconds {
1063            user.cache_ttl_seconds
1064        } else {
1065            base.cache_ttl_seconds
1066        },
1067        identity_allowlist: merge_identity_allowlist(
1068            base.identity_allowlist,
1069            user.identity_allowlist,
1070        ),
1071    }
1072}
1073
1074fn merge_identity_allowlist(base: IdentityAllowlist, user: IdentityAllowlist) -> IdentityAllowlist {
1075    let def = IdentityAllowlist::default();
1076    IdentityAllowlist {
1077        team: if user.team != def.team {
1078            user.team
1079        } else {
1080            base.team
1081        },
1082        workspace_label: if user.workspace_label != def.workspace_label {
1083            user.workspace_label
1084        } else {
1085            base.workspace_label
1086        },
1087        runner_label: if user.runner_label != def.runner_label {
1088            user.runner_label
1089        } else {
1090            base.runner_label
1091        },
1092        actor_kind: if user.actor_kind != def.actor_kind {
1093            user.actor_kind
1094        } else {
1095            base.actor_kind
1096        },
1097        actor_label: if user.actor_label != def.actor_label {
1098            user.actor_label
1099        } else {
1100            base.actor_label
1101        },
1102        agent: if user.agent != def.agent {
1103            user.agent
1104        } else {
1105            base.agent
1106        },
1107        model: if user.model != def.model {
1108            user.model
1109        } else {
1110            base.model
1111        },
1112        env: if user.env != def.env {
1113            user.env
1114        } else {
1115            base.env
1116        },
1117        job: if user.job != def.job {
1118            user.job
1119        } else {
1120            base.job
1121        },
1122        branch: if user.branch != def.branch {
1123            user.branch
1124        } else {
1125            base.branch
1126        },
1127    }
1128}
1129
1130fn merge_sync(base: SyncConfig, user: SyncConfig) -> SyncConfig {
1131    let def = SyncConfig::default();
1132    SyncConfig {
1133        endpoint: if !user.endpoint.is_empty() {
1134            user.endpoint
1135        } else {
1136            base.endpoint
1137        },
1138        team_token: if !user.team_token.is_empty() {
1139            user.team_token
1140        } else {
1141            base.team_token
1142        },
1143        team_id: if !user.team_id.is_empty() {
1144            user.team_id
1145        } else {
1146            base.team_id
1147        },
1148        events_per_batch_max: if user.events_per_batch_max != def.events_per_batch_max {
1149            user.events_per_batch_max
1150        } else {
1151            base.events_per_batch_max
1152        },
1153        max_body_bytes: if user.max_body_bytes != def.max_body_bytes {
1154            user.max_body_bytes
1155        } else {
1156            base.max_body_bytes
1157        },
1158        flush_interval_ms: if user.flush_interval_ms != def.flush_interval_ms {
1159            user.flush_interval_ms
1160        } else {
1161            base.flush_interval_ms
1162        },
1163        sample_rate: if (user.sample_rate - def.sample_rate).abs() > f64::EPSILON {
1164            user.sample_rate
1165        } else {
1166            base.sample_rate
1167        },
1168        team_salt_hex: if !user.team_salt_hex.is_empty() {
1169            user.team_salt_hex
1170        } else {
1171            base.team_salt_hex
1172        },
1173    }
1174}
1175
1176#[cfg(test)]
1177mod tests {
1178    use super::*;
1179    use std::io::Write;
1180    use tempfile::TempDir;
1181
1182    #[test]
1183    fn defaults_when_no_files() {
1184        let dir = TempDir::new().unwrap();
1185        let cfg = load(dir.path()).unwrap();
1186        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
1187        assert_eq!(cfg.scan.min_rescan_seconds, 300);
1188        assert_eq!(cfg.retention.hot_days, 30);
1189        assert_eq!(cfg.storage.cold_after_days, 7);
1190        assert_eq!(cfg.storage.hot_max_bytes_value(), 1_073_741_824);
1191    }
1192
1193    #[test]
1194    fn effective_redaction_salt_prefers_configured_team_salt() {
1195        let home = TempDir::new().unwrap();
1196        let sync = SyncConfig {
1197            team_salt_hex: "ab".repeat(32),
1198            ..Default::default()
1199        };
1200        let salt = effective_redaction_salt(&sync, home.path()).unwrap();
1201        assert_eq!(salt, [0xab_u8; 32]);
1202        // No local file written when team salt was sufficient.
1203        assert!(!home.path().join("local_salt.hex").exists());
1204    }
1205
1206    #[test]
1207    fn effective_redaction_salt_generates_and_persists_local_salt() {
1208        let home = TempDir::new().unwrap();
1209        let sync = SyncConfig::default();
1210        let a = effective_redaction_salt(&sync, home.path()).unwrap();
1211        let b = effective_redaction_salt(&sync, home.path()).unwrap();
1212        assert_eq!(a, b, "second call must reuse the persisted local salt");
1213        assert!(home.path().join("local_salt.hex").exists());
1214        #[cfg(unix)]
1215        {
1216            use std::os::unix::fs::PermissionsExt;
1217            let mode = std::fs::metadata(home.path().join("local_salt.hex"))
1218                .unwrap()
1219                .permissions()
1220                .mode()
1221                & 0o777;
1222            assert_eq!(mode, 0o600);
1223        }
1224    }
1225
1226    #[test]
1227    fn workspace_config_loaded() {
1228        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1229        let home = TempDir::new().unwrap();
1230        let ws = TempDir::new().unwrap();
1231        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1232        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1233        let mut f = std::fs::File::create(data_dir.join("config.toml")).unwrap();
1234        writeln!(f, "[scan]\nroots = [\"/custom/root\"]").unwrap();
1235        let cfg = load(ws.path()).unwrap();
1236        unsafe { std::env::remove_var("KAIZEN_HOME") };
1237        assert_eq!(cfg.scan.roots, vec!["/custom/root"]);
1238    }
1239
1240    #[test]
1241    fn invalid_toml_ignored() {
1242        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1243        let home = TempDir::new().unwrap();
1244        let ws = TempDir::new().unwrap();
1245        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1246        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1247        std::fs::write(data_dir.join("config.toml"), "not valid toml :::").unwrap();
1248        let cfg = load(ws.path()).unwrap();
1249        unsafe { std::env::remove_var("KAIZEN_HOME") };
1250        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
1251    }
1252
1253    #[test]
1254    fn merge_user_roots_win() {
1255        let base = Config {
1256            scan: ScanConfig {
1257                roots: vec!["/base".to_string()],
1258                ..ScanConfig::default()
1259            },
1260            ..Default::default()
1261        };
1262        let user = Config {
1263            scan: ScanConfig {
1264                roots: vec!["/user".to_string()],
1265                ..ScanConfig::default()
1266            },
1267            ..Default::default()
1268        };
1269        let merged = merge(base, user);
1270        assert_eq!(merged.scan.roots, vec!["/user"]);
1271    }
1272
1273    #[test]
1274    fn merge_sources_user_default_keeps_workspace_cursor() {
1275        let base = Config {
1276            sources: SourcesConfig {
1277                cursor: CursorSourceConfig {
1278                    enabled: false,
1279                    transcript_glob: "/workspace/glob/**".into(),
1280                },
1281                ..Default::default()
1282            },
1283            ..Default::default()
1284        };
1285        let user = Config::default();
1286        let merged = merge(base, user);
1287        assert!(!merged.sources.cursor.enabled);
1288        assert_eq!(merged.sources.cursor.transcript_glob, "/workspace/glob/**");
1289    }
1290
1291    #[test]
1292    fn merge_retention_field_by_field() {
1293        let base = Config {
1294            retention: RetentionConfig {
1295                hot_days: 60,
1296                warm_days: 90,
1297            },
1298            ..Default::default()
1299        };
1300        let user = Config {
1301            retention: RetentionConfig {
1302                hot_days: 30,
1303                warm_days: 45,
1304            },
1305            ..Default::default()
1306        };
1307        let merged = merge(base, user);
1308        assert_eq!(merged.retention.hot_days, 60);
1309        assert_eq!(merged.retention.warm_days, 45);
1310    }
1311
1312    #[test]
1313    fn merge_retention_user_hot_overrides() {
1314        let base = Config {
1315            retention: RetentionConfig {
1316                hot_days: 60,
1317                warm_days: 90,
1318            },
1319            ..Default::default()
1320        };
1321        let user = Config {
1322            retention: RetentionConfig {
1323                hot_days: 14,
1324                warm_days: 90,
1325            },
1326            ..Default::default()
1327        };
1328        let merged = merge(base, user);
1329        assert_eq!(merged.retention.hot_days, 14);
1330        assert_eq!(merged.retention.warm_days, 90);
1331    }
1332
1333    #[test]
1334    fn merge_storage_user_overrides() {
1335        let base = Config {
1336            storage: StorageConfig {
1337                hot_max_bytes: "2GB".into(),
1338                cold_after_days: 14,
1339                retention_days: 120,
1340                flush_hour_utc: 3,
1341            },
1342            ..Default::default()
1343        };
1344        let user = Config {
1345            storage: StorageConfig {
1346                cold_after_days: 3,
1347                ..StorageConfig::default()
1348            },
1349            ..Default::default()
1350        };
1351        let merged = merge(base, user);
1352        assert_eq!(merged.storage.hot_max_bytes, "2GB");
1353        assert_eq!(merged.storage.cold_after_days, 3);
1354        assert_eq!(merged.storage.retention_days, 120);
1355        assert_eq!(merged.storage.flush_hour_utc, 3);
1356    }
1357
1358    #[test]
1359    fn merge_telemetry_exporters_user_wins_non_empty() {
1360        let base = Config {
1361            telemetry: TelemetryConfig {
1362                fail_open: true,
1363                query: TelemetryQueryConfig::default(),
1364                exporters: vec![ExporterConfig::None],
1365            },
1366            ..Default::default()
1367        };
1368        let user = Config {
1369            telemetry: TelemetryConfig {
1370                fail_open: false,
1371                query: TelemetryQueryConfig::default(),
1372                exporters: vec![ExporterConfig::Dev { enabled: true }],
1373            },
1374            ..Default::default()
1375        };
1376        let merged = merge(base, user);
1377        assert!(!merged.telemetry.fail_open);
1378        assert_eq!(merged.telemetry.exporters.len(), 1);
1379    }
1380
1381    #[test]
1382    fn telemetry_query_defaults() {
1383        let t = TelemetryQueryConfig::default();
1384        assert_eq!(t.provider, QueryAuthority::None);
1385        assert_eq!(t.cache_ttl_seconds, 3600);
1386        assert!(!t.identity_allowlist.team);
1387        assert!(!t.has_provider_for_pull());
1388    }
1389
1390    #[test]
1391    fn telemetry_query_has_provider() {
1392        let ph = TelemetryQueryConfig {
1393            provider: QueryAuthority::Posthog,
1394            ..Default::default()
1395        };
1396        assert!(ph.has_provider_for_pull());
1397        let dd = TelemetryQueryConfig {
1398            provider: QueryAuthority::Datadog,
1399            ..Default::default()
1400        };
1401        assert!(dd.has_provider_for_pull());
1402    }
1403
1404    #[test]
1405    fn merge_telemetry_query_user_wins() {
1406        let base = Config {
1407            telemetry: TelemetryConfig {
1408                query: TelemetryQueryConfig {
1409                    provider: QueryAuthority::Posthog,
1410                    cache_ttl_seconds: 3600,
1411                    identity_allowlist: IdentityAllowlist {
1412                        team: true,
1413                        ..Default::default()
1414                    },
1415                },
1416                ..Default::default()
1417            },
1418            ..Default::default()
1419        };
1420        let user = Config {
1421            telemetry: TelemetryConfig {
1422                query: TelemetryQueryConfig {
1423                    cache_ttl_seconds: 7200,
1424                    ..Default::default()
1425                },
1426                ..Default::default()
1427            },
1428            ..Default::default()
1429        };
1430        let merged = merge(base, user);
1431        assert_eq!(merged.telemetry.query.provider, QueryAuthority::Posthog);
1432        assert_eq!(merged.telemetry.query.cache_ttl_seconds, 7200);
1433        assert!(merged.telemetry.query.identity_allowlist.team);
1434    }
1435
1436    #[test]
1437    fn toml_telemetry_query_roundtrip() {
1438        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1439        let home = TempDir::new().unwrap();
1440        let ws = TempDir::new().unwrap();
1441        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1442        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1443        let toml = r#"
1444[telemetry.query]
1445provider = "datadog"
1446cache_ttl_seconds = 1800
1447
1448[telemetry.query.identity_allowlist]
1449team = true
1450branch = true
1451"#;
1452        std::fs::write(data_dir.join("config.toml"), toml).unwrap();
1453        let cfg = load(ws.path()).unwrap();
1454        unsafe { std::env::remove_var("KAIZEN_HOME") };
1455        assert_eq!(cfg.telemetry.query.provider, QueryAuthority::Datadog);
1456        assert_eq!(cfg.telemetry.query.cache_ttl_seconds, 1800);
1457        assert!(cfg.telemetry.query.identity_allowlist.team);
1458        assert!(cfg.telemetry.query.identity_allowlist.branch);
1459        assert!(!cfg.telemetry.query.identity_allowlist.model);
1460    }
1461}