Skip to main content

kaizen/core/
config.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Config loading: workspace `.kaizen/config.toml` then `~/.kaizen/config.toml`.
3//! Missing files → defaults. User config wins on overlap.
4
5use anyhow::Result;
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
10pub struct ScanConfig {
11    pub roots: Vec<String>,
12    /// Minimum seconds between full agent transcript rescans when `--refresh` is not passed.
13    #[serde(default = "default_min_rescan_seconds")]
14    pub min_rescan_seconds: u64,
15}
16
17fn default_min_rescan_seconds() -> u64 {
18    300
19}
20
21impl Default for ScanConfig {
22    fn default() -> Self {
23        Self {
24            roots: vec!["~/.cursor/projects".to_string()],
25            min_rescan_seconds: default_min_rescan_seconds(),
26        }
27    }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct CursorSourceConfig {
32    pub enabled: bool,
33    pub transcript_glob: String,
34}
35
36impl Default for CursorSourceConfig {
37    fn default() -> Self {
38        Self {
39            enabled: true,
40            transcript_glob: "*/agent-transcripts".to_string(),
41        }
42    }
43}
44
45/// Enable tier-1 tail ingestion for agents that store data outside Cursor/Claude/Codex paths.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct TailAgentToggles {
48    #[serde(default = "default_true")]
49    pub gemini: bool,
50    #[serde(default = "default_true")]
51    pub pi: bool,
52    #[serde(default = "default_true")]
53    pub kimi: bool,
54    #[serde(default = "default_true")]
55    pub antigravity: bool,
56    #[serde(default = "default_true")]
57    pub cursor_state_db: bool,
58    #[serde(default = "default_true")]
59    pub goose: bool,
60    #[serde(default = "default_true")]
61    pub openclaw: bool,
62    #[serde(default = "default_true")]
63    pub opencode: bool,
64    #[serde(default = "default_true")]
65    pub copilot_cli: bool,
66    #[serde(default = "default_true")]
67    pub copilot_vscode: bool,
68}
69
70impl Default for TailAgentToggles {
71    fn default() -> Self {
72        Self {
73            gemini: true,
74            pi: true,
75            kimi: true,
76            antigravity: true,
77            cursor_state_db: true,
78            goose: true,
79            openclaw: true,
80            opencode: true,
81            copilot_cli: true,
82            copilot_vscode: true,
83        }
84    }
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize, Default)]
88pub struct SourcesConfig {
89    #[serde(default)]
90    pub cursor: CursorSourceConfig,
91    #[serde(default)]
92    pub tail: TailAgentToggles,
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
96pub struct RetentionConfig {
97    pub hot_days: u32,
98    pub warm_days: u32,
99}
100
101impl Default for RetentionConfig {
102    fn default() -> Self {
103        Self {
104            hot_days: 30,
105            warm_days: 90,
106        }
107    }
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
111pub struct StorageConfig {
112    pub hot_max_bytes: String,
113    pub cold_after_days: u32,
114    pub retention_days: u32,
115    pub flush_hour_utc: u8,
116}
117
118impl Default for StorageConfig {
119    fn default() -> Self {
120        Self {
121            hot_max_bytes: "1GB".into(),
122            cold_after_days: 7,
123            retention_days: 90,
124            flush_hour_utc: 0,
125        }
126    }
127}
128
129impl StorageConfig {
130    pub fn hot_max_bytes_value(&self) -> u64 {
131        parse_byte_size(&self.hot_max_bytes).unwrap_or(1_073_741_824)
132    }
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct SyncConfig {
137    /// When empty, sync is disabled (no outbox enqueue, `sync run` no-ops flush).
138    #[serde(default)]
139    pub endpoint: String,
140    #[serde(default)]
141    pub team_token: String,
142    #[serde(default)]
143    pub team_id: String,
144    #[serde(default = "default_events_per_batch")]
145    pub events_per_batch_max: usize,
146    #[serde(default = "default_max_body_bytes")]
147    pub max_body_bytes: usize,
148    #[serde(default = "default_flush_interval_ms")]
149    pub flush_interval_ms: u64,
150    #[serde(default = "default_sample_rate")]
151    pub sample_rate: f64,
152    /// 64 hex chars (32 bytes). Prefer `~/.kaizen/config.toml` only; never committed workspace secrets.
153    #[serde(default)]
154    pub team_salt_hex: String,
155}
156
157fn default_events_per_batch() -> usize {
158    500
159}
160
161fn default_max_body_bytes() -> usize {
162    1_000_000
163}
164
165fn default_flush_interval_ms() -> u64 {
166    10_000
167}
168
169fn default_sample_rate() -> f64 {
170    1.0
171}
172
173impl Default for SyncConfig {
174    fn default() -> Self {
175        Self {
176            endpoint: String::new(),
177            team_token: String::new(),
178            team_id: String::new(),
179            events_per_batch_max: default_events_per_batch(),
180            max_body_bytes: default_max_body_bytes(),
181            flush_interval_ms: default_flush_interval_ms(),
182            sample_rate: default_sample_rate(),
183            team_salt_hex: String::new(),
184        }
185    }
186}
187
188/// Parse `team_salt_hex` into 32 bytes. Returns `None` if missing or invalid.
189pub fn try_team_salt(cfg: &SyncConfig) -> Option<[u8; 32]> {
190    let h = cfg.team_salt_hex.trim();
191    if h.len() != 64 {
192        return None;
193    }
194    let bytes = hex::decode(h).ok()?;
195    bytes.try_into().ok()
196}
197
198/// Resolve a 32-byte redaction salt for telemetry-only flows (push/test) when sync is not
199/// configured. Order: configured `[sync].team_salt_hex` → `<kaizen_home>/local_salt.hex`
200/// → freshly generated and persisted at `0o600`. Telemetry never blocks on cloud sync.
201pub fn effective_redaction_salt(
202    cfg: &SyncConfig,
203    kaizen_home: &std::path::Path,
204) -> Result<[u8; 32]> {
205    if let Some(s) = try_team_salt(cfg) {
206        return Ok(s);
207    }
208    let path = kaizen_home.join("local_salt.hex");
209    if let Some(s) = read_local_salt(&path)? {
210        return Ok(s);
211    }
212    let bytes = generate_local_salt();
213    write_local_salt(&path, &bytes)?;
214    Ok(bytes)
215}
216
217fn read_local_salt(path: &std::path::Path) -> Result<Option<[u8; 32]>> {
218    use std::io::ErrorKind;
219    match std::fs::read_to_string(path) {
220        Ok(s) => Ok(parse_salt_hex(s.trim())),
221        Err(e) if e.kind() == ErrorKind::NotFound => Ok(None),
222        Err(e) => Err(e.into()),
223    }
224}
225
226fn parse_salt_hex(h: &str) -> Option<[u8; 32]> {
227    if h.len() != 64 {
228        return None;
229    }
230    hex::decode(h).ok()?.try_into().ok()
231}
232
233fn generate_local_salt() -> [u8; 32] {
234    use rand::Rng;
235    let mut bytes = [0u8; 32];
236    rand::rng().fill_bytes(&mut bytes);
237    bytes
238}
239
240fn write_local_salt(path: &std::path::Path, bytes: &[u8; 32]) -> Result<()> {
241    if let Some(parent) = path.parent() {
242        std::fs::create_dir_all(parent)?;
243    }
244    let hex_s = hex::encode(bytes);
245    std::fs::write(path, hex_s.as_bytes())?;
246    set_user_only_perms(path)?;
247    Ok(())
248}
249
250#[cfg(unix)]
251fn set_user_only_perms(path: &std::path::Path) -> Result<()> {
252    use std::os::unix::fs::PermissionsExt;
253    let mut perms = std::fs::metadata(path)?.permissions();
254    perms.set_mode(0o600);
255    std::fs::set_permissions(path, perms)?;
256    Ok(())
257}
258
259#[cfg(not(unix))]
260fn set_user_only_perms(_path: &std::path::Path) -> Result<()> {
261    Ok(())
262}
263
264fn default_true() -> bool {
265    true
266}
267
268fn default_telemetry_fail_open() -> bool {
269    true
270}
271
272fn default_cache_ttl_seconds() -> u64 {
273    3600
274}
275
276/// Which third-party system is the single source for query-back / pull; OTLP is export-only, not a pull target.
277#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
278#[serde(rename_all = "lowercase")]
279pub enum QueryAuthority {
280    #[default]
281    None,
282    Posthog,
283    Datadog,
284}
285
286/// Per-field allowlist: when `false` (default), the field is omitted or hashed in telemetry exports.
287#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
288pub struct IdentityAllowlist {
289    #[serde(default)]
290    pub team: bool,
291    #[serde(default)]
292    pub workspace_label: bool,
293    #[serde(default)]
294    pub runner_label: bool,
295    #[serde(default)]
296    pub actor_kind: bool,
297    #[serde(default)]
298    pub actor_label: bool,
299    #[serde(default)]
300    pub agent: bool,
301    #[serde(default)]
302    pub model: bool,
303    #[serde(default)]
304    pub env: bool,
305    #[serde(default)]
306    pub job: bool,
307    #[serde(default)]
308    pub branch: bool,
309}
310
311/// Remote pull: query authority, cache TTL, and which identity labels may leave as cleartext.
312#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
313pub struct TelemetryQueryConfig {
314    /// `posthog` or `datadog` enables provider pull when implemented; `none` or unset = no query authority.
315    #[serde(default)]
316    pub provider: QueryAuthority,
317    /// Seconds to treat remote cache rows as fresh (unless the CLI requests `--refresh`).
318    #[serde(default = "default_cache_ttl_seconds")]
319    pub cache_ttl_seconds: u64,
320    #[serde(default)]
321    pub identity_allowlist: IdentityAllowlist,
322}
323
324impl Default for TelemetryQueryConfig {
325    fn default() -> Self {
326        Self {
327            provider: QueryAuthority::default(),
328            cache_ttl_seconds: default_cache_ttl_seconds(),
329            identity_allowlist: IdentityAllowlist::default(),
330        }
331    }
332}
333
334impl TelemetryQueryConfig {
335    /// True when a PostHog or Datadog pull backend may be used (OTLP is not a pull target).
336    pub fn has_provider_for_pull(&self) -> bool {
337        matches!(
338            self.provider,
339            QueryAuthority::Posthog | QueryAuthority::Datadog
340        )
341    }
342}
343
344/// How to reduce billed input to the model (opt-in; default leaves requests unchanged).
345#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
346#[serde(tag = "type", rename_all = "snake_case")]
347pub enum ContextPolicy {
348    /// No transformation beyond optional JSON minify (same tokens as a direct call).
349    #[default]
350    None,
351    /// Keep the last `count` `messages` array entries; system blocks unchanged when present.
352    LastMessages { count: usize },
353    /// Drop oldest messages until a rough `chars/4` estimate stays at or below `max`.
354    MaxInputTokens { max: u32 },
355}
356
357/// Anthropic API-compatible HTTP proxy: forward + local telemetry. See `docs/llm-proxy.md`.
358#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
359pub struct ProxyConfig {
360    /// e.g. `127.0.0.1:3847` (bind address for `kaizen proxy run`).
361    #[serde(default = "default_proxy_listen")]
362    pub listen: String,
363    /// Base URL, no trailing slash, e.g. `https://api.anthropic.com`.
364    #[serde(default = "default_proxy_upstream")]
365    pub upstream: String,
366    /// `anthropic`, `openai`, or `auto`; controls launcher/env hints and default upstream.
367    #[serde(default = "default_proxy_provider")]
368    pub provider: String,
369    /// Prefer `Accept-Encoding: gzip` to upstream (response bodies may be gzip).
370    #[serde(default = "default_true")]
371    pub compress_transport: bool,
372    /// Re-encode JSON bodies to compact `serde_json` (no key reorder; whitespace only).
373    #[serde(default = "default_true")]
374    pub minify_json: bool,
375    /// Slurp cap for a single upstream response (streaming not yet teed; see doc).
376    #[serde(default = "default_proxy_max_body_mb")]
377    pub max_response_body_mb: u32,
378    /// Reject / fail incoming client bodies above this (POST bodies before forward).
379    #[serde(default = "default_proxy_max_request_body_mb")]
380    pub max_request_body_mb: u32,
381    /// Optional token-aware truncation of `messages` in JSON bodies.
382    #[serde(default)]
383    pub context_policy: ContextPolicy,
384}
385
386fn default_proxy_listen() -> String {
387    "127.0.0.1:3847".to_string()
388}
389
390fn default_proxy_upstream() -> String {
391    "https://api.anthropic.com".to_string()
392}
393
394fn default_proxy_provider() -> String {
395    "anthropic".to_string()
396}
397
398fn default_proxy_max_body_mb() -> u32 {
399    256
400}
401
402fn default_proxy_max_request_body_mb() -> u32 {
403    32
404}
405
406impl Default for ProxyConfig {
407    fn default() -> Self {
408        Self {
409            listen: default_proxy_listen(),
410            upstream: default_proxy_upstream(),
411            provider: default_proxy_provider(),
412            compress_transport: true,
413            minify_json: true,
414            max_response_body_mb: default_proxy_max_body_mb(),
415            max_request_body_mb: default_proxy_max_request_body_mb(),
416            context_policy: ContextPolicy::default(),
417        }
418    }
419}
420
421/// Optional third-party telemetry sinks; same redacted batches as Kaizen sync.
422#[derive(Debug, Clone, Serialize, Deserialize)]
423pub struct TelemetryConfig {
424    /// When `true` (default), ignore exporter errors; when `false`, `flush` fails if any secondary errors.
425    #[serde(default = "default_telemetry_fail_open")]
426    pub fail_open: bool,
427    /// Query-back / pull API: authority, cache TTL, identity allowlist.
428    #[serde(default)]
429    pub query: TelemetryQueryConfig,
430    /// Declarative list; `type = "none"` rows are accepted and ignored.
431    #[serde(default)]
432    pub exporters: Vec<ExporterConfig>,
433}
434
435impl Default for TelemetryConfig {
436    fn default() -> Self {
437        Self {
438            fail_open: default_telemetry_fail_open(),
439            query: TelemetryQueryConfig::default(),
440            exporters: Vec::new(),
441        }
442    }
443}
444
445/// One pluggable sink; TOML `type` is the tag.
446#[derive(Debug, Clone, Serialize, Deserialize)]
447#[serde(tag = "type", rename_all = "lowercase")]
448pub enum ExporterConfig {
449    /// No-op row for sparse tables / templates.
450    None,
451    /// Append summary JSON lines to a local NDJSON file (default `<workspace>/.kaizen/telemetry.ndjson`).
452    File {
453        #[serde(default = "default_true")]
454        enabled: bool,
455        #[serde(default)]
456        path: Option<String>,
457    },
458    /// Echo to tracing (for wiring tests; requires the `telemetry-dev` build feature).
459    Dev {
460        #[serde(default = "default_true")]
461        enabled: bool,
462    },
463    PostHog {
464        #[serde(default = "default_true")]
465        enabled: bool,
466        /// e.g. `https://us.i.posthog.com` (default when unset)
467        host: Option<String>,
468        /// Prefer env `POSTHOG_API_KEY` or `KAIZEN_POSTHOG_API_KEY`
469        project_api_key: Option<String>,
470    },
471    Datadog {
472        #[serde(default = "default_true")]
473        enabled: bool,
474        /// e.g. `datadoghq.com`; env `DD_SITE` overrides
475        site: Option<String>,
476        /// Prefer env `DD_API_KEY` or `KAIZEN_DD_API_KEY`
477        api_key: Option<String>,
478    },
479    Otlp {
480        #[serde(default = "default_true")]
481        enabled: bool,
482        /// Env `OTEL_EXPORTER_OTLP_ENDPOINT` (or KAIZEN_ prefix) when unset here
483        endpoint: Option<String>,
484    },
485}
486
487impl ExporterConfig {
488    /// Whether this row should be considered for `load_exporters` (excludes `None`).
489    pub fn is_enabled(&self) -> bool {
490        match self {
491            ExporterConfig::None => false,
492            ExporterConfig::File { enabled, .. } => *enabled,
493            ExporterConfig::Dev { enabled, .. } => *enabled,
494            ExporterConfig::PostHog { enabled, .. } => *enabled,
495            ExporterConfig::Datadog { enabled, .. } => *enabled,
496            ExporterConfig::Otlp { enabled, .. } => *enabled,
497        }
498    }
499}
500
501#[derive(Debug, Clone, Serialize, Deserialize)]
502pub struct EvalConfig {
503    #[serde(default)]
504    pub enabled: bool,
505    #[serde(default = "default_eval_endpoint")]
506    pub endpoint: String,
507    #[serde(default)]
508    pub api_key: String,
509    #[serde(default = "default_eval_model")]
510    pub model: String,
511    #[serde(default = "default_eval_rubric")]
512    pub rubric: String,
513    #[serde(default = "default_eval_batch_size")]
514    pub batch_size: usize,
515    #[serde(default = "default_eval_min_cost")]
516    pub min_cost_usd: f64,
517}
518
519impl Default for EvalConfig {
520    fn default() -> Self {
521        Self {
522            enabled: false,
523            endpoint: default_eval_endpoint(),
524            api_key: String::new(),
525            model: default_eval_model(),
526            rubric: default_eval_rubric(),
527            batch_size: default_eval_batch_size(),
528            min_cost_usd: default_eval_min_cost(),
529        }
530    }
531}
532
533fn default_eval_endpoint() -> String {
534    "https://api.anthropic.com".into()
535}
536fn default_eval_model() -> String {
537    "claude-haiku-4-5-20251001".into()
538}
539fn default_eval_rubric() -> String {
540    "tool-efficiency-v1".into()
541}
542fn default_eval_batch_size() -> usize {
543    20
544}
545fn default_eval_min_cost() -> f64 {
546    0.01
547}
548
549#[derive(Debug, Clone, Serialize, Deserialize)]
550pub struct GuidanceProposalConfig {
551    #[serde(default)]
552    pub enabled: bool,
553    #[serde(default = "default_guidance_endpoint")]
554    pub endpoint: String,
555    #[serde(default)]
556    pub api_key: String,
557    #[serde(default = "default_guidance_model")]
558    pub model: String,
559    #[serde(default = "default_guidance_max_ops")]
560    pub max_ops: usize,
561    #[serde(default = "default_true")]
562    pub redact: bool,
563}
564
565#[derive(Debug, Clone, Serialize, Deserialize, Default)]
566pub struct GuidanceConfig {
567    #[serde(default)]
568    pub proposals: GuidanceProposalConfig,
569}
570
571impl Default for GuidanceProposalConfig {
572    fn default() -> Self {
573        Self {
574            enabled: false,
575            endpoint: default_guidance_endpoint(),
576            api_key: String::new(),
577            model: default_guidance_model(),
578            max_ops: default_guidance_max_ops(),
579            redact: true,
580        }
581    }
582}
583
584fn default_guidance_endpoint() -> String {
585    default_eval_endpoint()
586}
587fn default_guidance_model() -> String {
588    default_eval_model()
589}
590fn default_guidance_max_ops() -> usize {
591    3
592}
593
594/// Opt-in post-hook outcome measurement (Tier C).
595#[derive(Debug, Clone, Serialize, Deserialize)]
596pub struct CollectOutcomesConfig {
597    #[serde(default)]
598    pub enabled: bool,
599    #[serde(default = "default_outcomes_test_cmd")]
600    pub test_cmd: String,
601    #[serde(default = "default_outcomes_timeout_secs")]
602    pub timeout_secs: u64,
603    #[serde(default)]
604    pub lint_cmd: Option<String>,
605}
606
607fn default_outcomes_test_cmd() -> String {
608    "cargo test --quiet".to_string()
609}
610
611fn default_outcomes_timeout_secs() -> u64 {
612    600
613}
614
615impl Default for CollectOutcomesConfig {
616    fn default() -> Self {
617        Self {
618            enabled: false,
619            test_cmd: default_outcomes_test_cmd(),
620            timeout_secs: default_outcomes_timeout_secs(),
621            lint_cmd: None,
622        }
623    }
624}
625
626/// Opt-in per-process sampling (Tier D).
627#[derive(Debug, Clone, Serialize, Deserialize)]
628pub struct CollectSystemSamplerConfig {
629    #[serde(default)]
630    pub enabled: bool,
631    #[serde(default = "default_sampler_sample_ms")]
632    pub sample_ms: u64,
633    #[serde(default = "default_sampler_max_samples")]
634    pub max_samples_per_session: u32,
635}
636
637fn default_sampler_sample_ms() -> u64 {
638    2000
639}
640
641fn default_sampler_max_samples() -> u32 {
642    3600
643}
644
645impl Default for CollectSystemSamplerConfig {
646    fn default() -> Self {
647        Self {
648            enabled: false,
649            sample_ms: default_sampler_sample_ms(),
650            max_samples_per_session: default_sampler_max_samples(),
651        }
652    }
653}
654
655#[derive(Debug, Clone, Serialize, Deserialize, Default)]
656pub struct CollectConfig {
657    #[serde(default)]
658    pub outcomes: CollectOutcomesConfig,
659    #[serde(default)]
660    pub system_sampler: CollectSystemSamplerConfig,
661}
662
663#[derive(Debug, Clone, Serialize, Deserialize, Default)]
664pub struct Config {
665    #[serde(default)]
666    pub scan: ScanConfig,
667    #[serde(default)]
668    pub sources: SourcesConfig,
669    #[serde(default)]
670    pub retention: RetentionConfig,
671    #[serde(default)]
672    pub storage: StorageConfig,
673    #[serde(default)]
674    pub sync: SyncConfig,
675    #[serde(default)]
676    pub telemetry: TelemetryConfig,
677    #[serde(default)]
678    pub proxy: ProxyConfig,
679    #[serde(default)]
680    pub eval: EvalConfig,
681    #[serde(default)]
682    pub guidance: GuidanceConfig,
683    #[serde(default)]
684    pub collect: CollectConfig,
685}
686
687/// Load config: `~/.kaizen/projects/<slug>/config.toml` then `~/.kaizen/config.toml`.
688/// User config wins on overlap. Missing files → defaults, not error.
689pub fn load(workspace: &Path) -> Result<Config> {
690    let project_cfg = crate::core::paths::project_data_dir(workspace)
691        .ok()
692        .map(|d| d.join("config.toml"));
693    let user_path = crate::core::paths::kaizen_dir()
694        .ok_or_else(|| anyhow::anyhow!("KAIZEN_HOME / HOME unset"))?
695        .join("config.toml");
696
697    let base = project_cfg
698        .as_deref()
699        .and_then(load_file)
700        .unwrap_or_default();
701    let user = load_file(&user_path).unwrap_or_default();
702    Ok(merge(base, user))
703}
704
705fn load_file(path: &Path) -> Option<Config> {
706    let text = std::fs::read_to_string(path).ok()?;
707    toml::from_str(&text).ok()
708}
709
710fn merge(base: Config, user: Config) -> Config {
711    Config {
712        scan: merge_scan(base.scan, user.scan),
713        sources: merge_sources(base.sources, user.sources),
714        retention: merge_retention(base.retention, user.retention),
715        storage: merge_storage(base.storage, user.storage),
716        sync: merge_sync(base.sync, user.sync),
717        telemetry: merge_telemetry(base.telemetry, user.telemetry),
718        proxy: merge_proxy(base.proxy, user.proxy),
719        eval: merge_eval(base.eval, user.eval),
720        guidance: merge_guidance(base.guidance, user.guidance),
721        collect: merge_collect(base.collect, user.collect),
722    }
723}
724
725fn merge_guidance(base: GuidanceConfig, user: GuidanceConfig) -> GuidanceConfig {
726    GuidanceConfig {
727        proposals: merge_guidance_proposals(base.proposals, user.proposals),
728    }
729}
730
731fn merge_guidance_proposals(
732    base: GuidanceProposalConfig,
733    user: GuidanceProposalConfig,
734) -> GuidanceProposalConfig {
735    let def = GuidanceProposalConfig::default();
736    GuidanceProposalConfig {
737        enabled: pick_bool(user.enabled, base.enabled, def.enabled),
738        endpoint: pick_string(user.endpoint, base.endpoint, def.endpoint),
739        api_key: if user.api_key.is_empty() {
740            base.api_key
741        } else {
742            user.api_key
743        },
744        model: pick_string(user.model, base.model, def.model),
745        max_ops: if user.max_ops != def.max_ops {
746            user.max_ops
747        } else {
748            base.max_ops
749        },
750        redact: pick_bool(user.redact, base.redact, def.redact),
751    }
752}
753
754fn pick_bool(user: bool, base: bool, def: bool) -> bool {
755    if user != def { user } else { base }
756}
757
758fn pick_string(user: String, base: String, def: String) -> String {
759    if user != def { user } else { base }
760}
761
762fn merge_collect(base: CollectConfig, user: CollectConfig) -> CollectConfig {
763    let def = CollectConfig::default();
764    CollectConfig {
765        outcomes: merge_collect_outcomes(base.outcomes, user.outcomes, def.outcomes),
766        system_sampler: merge_collect_sampler(
767            base.system_sampler,
768            user.system_sampler,
769            def.system_sampler,
770        ),
771    }
772}
773
774fn merge_collect_outcomes(
775    base: CollectOutcomesConfig,
776    user: CollectOutcomesConfig,
777    def: CollectOutcomesConfig,
778) -> CollectOutcomesConfig {
779    CollectOutcomesConfig {
780        enabled: if user.enabled != def.enabled {
781            user.enabled
782        } else {
783            base.enabled
784        },
785        test_cmd: if user.test_cmd != def.test_cmd {
786            user.test_cmd
787        } else {
788            base.test_cmd
789        },
790        timeout_secs: if user.timeout_secs != def.timeout_secs {
791            user.timeout_secs
792        } else {
793            base.timeout_secs
794        },
795        lint_cmd: user.lint_cmd.or(base.lint_cmd),
796    }
797}
798
799fn merge_collect_sampler(
800    base: CollectSystemSamplerConfig,
801    user: CollectSystemSamplerConfig,
802    def: CollectSystemSamplerConfig,
803) -> CollectSystemSamplerConfig {
804    CollectSystemSamplerConfig {
805        enabled: if user.enabled != def.enabled {
806            user.enabled
807        } else {
808            base.enabled
809        },
810        sample_ms: if user.sample_ms != def.sample_ms {
811            user.sample_ms
812        } else {
813            base.sample_ms
814        },
815        max_samples_per_session: if user.max_samples_per_session != def.max_samples_per_session {
816            user.max_samples_per_session
817        } else {
818            base.max_samples_per_session
819        },
820    }
821}
822
823fn merge_sources(base: SourcesConfig, user: SourcesConfig) -> SourcesConfig {
824    let def = SourcesConfig::default();
825    SourcesConfig {
826        cursor: merge_cursor_source(base.cursor, user.cursor, def.cursor),
827        tail: merge_tail_toggles(base.tail, user.tail, def.tail),
828    }
829}
830
831fn merge_cursor_source(
832    base: CursorSourceConfig,
833    user: CursorSourceConfig,
834    def: CursorSourceConfig,
835) -> CursorSourceConfig {
836    CursorSourceConfig {
837        enabled: if user.enabled != def.enabled {
838            user.enabled
839        } else {
840            base.enabled
841        },
842        transcript_glob: if user.transcript_glob != def.transcript_glob {
843            user.transcript_glob
844        } else {
845            base.transcript_glob
846        },
847    }
848}
849
850fn merge_tail_toggles(
851    base: TailAgentToggles,
852    user: TailAgentToggles,
853    def: TailAgentToggles,
854) -> TailAgentToggles {
855    TailAgentToggles {
856        gemini: if user.gemini != def.gemini {
857            user.gemini
858        } else {
859            base.gemini
860        },
861        pi: if user.pi != def.pi { user.pi } else { base.pi },
862        kimi: if user.kimi != def.kimi {
863            user.kimi
864        } else {
865            base.kimi
866        },
867        antigravity: if user.antigravity != def.antigravity {
868            user.antigravity
869        } else {
870            base.antigravity
871        },
872        cursor_state_db: if user.cursor_state_db != def.cursor_state_db {
873            user.cursor_state_db
874        } else {
875            base.cursor_state_db
876        },
877        goose: if user.goose != def.goose {
878            user.goose
879        } else {
880            base.goose
881        },
882        openclaw: if user.openclaw != def.openclaw {
883            user.openclaw
884        } else {
885            base.openclaw
886        },
887        opencode: if user.opencode != def.opencode {
888            user.opencode
889        } else {
890            base.opencode
891        },
892        copilot_cli: if user.copilot_cli != def.copilot_cli {
893            user.copilot_cli
894        } else {
895            base.copilot_cli
896        },
897        copilot_vscode: if user.copilot_vscode != def.copilot_vscode {
898            user.copilot_vscode
899        } else {
900            base.copilot_vscode
901        },
902    }
903}
904
905fn merge_eval(base: EvalConfig, user: EvalConfig) -> EvalConfig {
906    let def = EvalConfig::default();
907    EvalConfig {
908        enabled: if user.enabled != def.enabled {
909            user.enabled
910        } else {
911            base.enabled
912        },
913        endpoint: if user.endpoint != def.endpoint {
914            user.endpoint
915        } else {
916            base.endpoint
917        },
918        api_key: if !user.api_key.is_empty() {
919            user.api_key
920        } else {
921            base.api_key
922        },
923        model: if user.model != def.model {
924            user.model
925        } else {
926            base.model
927        },
928        rubric: if user.rubric != def.rubric {
929            user.rubric
930        } else {
931            base.rubric
932        },
933        batch_size: if user.batch_size != def.batch_size {
934            user.batch_size
935        } else {
936            base.batch_size
937        },
938        min_cost_usd: if user.min_cost_usd != def.min_cost_usd {
939            user.min_cost_usd
940        } else {
941            base.min_cost_usd
942        },
943    }
944}
945
946fn merge_scan(base: ScanConfig, user: ScanConfig) -> ScanConfig {
947    let def = ScanConfig::default();
948    ScanConfig {
949        roots: if user.roots != def.roots {
950            user.roots
951        } else {
952            base.roots
953        },
954        min_rescan_seconds: if user.min_rescan_seconds != def.min_rescan_seconds {
955            user.min_rescan_seconds
956        } else {
957            base.min_rescan_seconds
958        },
959    }
960}
961
962fn merge_retention(base: RetentionConfig, user: RetentionConfig) -> RetentionConfig {
963    let def = RetentionConfig::default();
964    RetentionConfig {
965        hot_days: if user.hot_days != def.hot_days {
966            user.hot_days
967        } else {
968            base.hot_days
969        },
970        warm_days: if user.warm_days != def.warm_days {
971            user.warm_days
972        } else {
973            base.warm_days
974        },
975    }
976}
977
978fn merge_storage(base: StorageConfig, user: StorageConfig) -> StorageConfig {
979    let def = StorageConfig::default();
980    StorageConfig {
981        hot_max_bytes: if user.hot_max_bytes != def.hot_max_bytes {
982            user.hot_max_bytes
983        } else {
984            base.hot_max_bytes
985        },
986        cold_after_days: if user.cold_after_days != def.cold_after_days {
987            user.cold_after_days
988        } else {
989            base.cold_after_days
990        },
991        retention_days: if user.retention_days != def.retention_days {
992            user.retention_days
993        } else {
994            base.retention_days
995        },
996        flush_hour_utc: if user.flush_hour_utc != def.flush_hour_utc {
997            user.flush_hour_utc
998        } else {
999            base.flush_hour_utc
1000        },
1001    }
1002}
1003
1004fn parse_byte_size(raw: &str) -> Option<u64> {
1005    let s = raw.trim();
1006    let digits = s
1007        .chars()
1008        .take_while(|c| c.is_ascii_digit())
1009        .collect::<String>();
1010    let n = digits.parse::<u64>().ok()?;
1011    let unit = s[digits.len()..].trim().to_ascii_lowercase();
1012    Some(match unit.as_str() {
1013        "" | "b" => n,
1014        "kb" | "kib" => n.saturating_mul(1024),
1015        "mb" | "mib" => n.saturating_mul(1024 * 1024),
1016        "gb" | "gib" => n.saturating_mul(1024 * 1024 * 1024),
1017        _ => return None,
1018    })
1019}
1020
1021fn merge_proxy(base: ProxyConfig, user: ProxyConfig) -> ProxyConfig {
1022    let def = ProxyConfig::default();
1023    ProxyConfig {
1024        listen: if user.listen != def.listen {
1025            user.listen
1026        } else {
1027            base.listen
1028        },
1029        upstream: if user.upstream != def.upstream {
1030            user.upstream
1031        } else {
1032            base.upstream
1033        },
1034        provider: if user.provider != def.provider {
1035            user.provider
1036        } else {
1037            base.provider
1038        },
1039        compress_transport: if user.compress_transport != def.compress_transport {
1040            user.compress_transport
1041        } else {
1042            base.compress_transport
1043        },
1044        minify_json: if user.minify_json != def.minify_json {
1045            user.minify_json
1046        } else {
1047            base.minify_json
1048        },
1049        max_response_body_mb: if user.max_response_body_mb != def.max_response_body_mb {
1050            user.max_response_body_mb
1051        } else {
1052            base.max_response_body_mb
1053        },
1054        max_request_body_mb: if user.max_request_body_mb != def.max_request_body_mb {
1055            user.max_request_body_mb
1056        } else {
1057            base.max_request_body_mb
1058        },
1059        context_policy: if user.context_policy != def.context_policy {
1060            user.context_policy
1061        } else {
1062            base.context_policy
1063        },
1064    }
1065}
1066
1067fn merge_telemetry(base: TelemetryConfig, user: TelemetryConfig) -> TelemetryConfig {
1068    let def = TelemetryConfig::default();
1069    let fail_open = if user.fail_open != def.fail_open {
1070        user.fail_open
1071    } else {
1072        base.fail_open
1073    };
1074    let query = merge_telemetry_query(base.query, user.query);
1075    let exporters = if !user.exporters.is_empty() {
1076        user.exporters
1077    } else {
1078        base.exporters
1079    };
1080    TelemetryConfig {
1081        fail_open,
1082        query,
1083        exporters,
1084    }
1085}
1086
1087fn merge_telemetry_query(
1088    base: TelemetryQueryConfig,
1089    user: TelemetryQueryConfig,
1090) -> TelemetryQueryConfig {
1091    let def = TelemetryQueryConfig::default();
1092    TelemetryQueryConfig {
1093        provider: if user.provider != def.provider {
1094            user.provider
1095        } else {
1096            base.provider
1097        },
1098        cache_ttl_seconds: if user.cache_ttl_seconds != def.cache_ttl_seconds {
1099            user.cache_ttl_seconds
1100        } else {
1101            base.cache_ttl_seconds
1102        },
1103        identity_allowlist: merge_identity_allowlist(
1104            base.identity_allowlist,
1105            user.identity_allowlist,
1106        ),
1107    }
1108}
1109
1110fn merge_identity_allowlist(base: IdentityAllowlist, user: IdentityAllowlist) -> IdentityAllowlist {
1111    let def = IdentityAllowlist::default();
1112    IdentityAllowlist {
1113        team: if user.team != def.team {
1114            user.team
1115        } else {
1116            base.team
1117        },
1118        workspace_label: if user.workspace_label != def.workspace_label {
1119            user.workspace_label
1120        } else {
1121            base.workspace_label
1122        },
1123        runner_label: if user.runner_label != def.runner_label {
1124            user.runner_label
1125        } else {
1126            base.runner_label
1127        },
1128        actor_kind: if user.actor_kind != def.actor_kind {
1129            user.actor_kind
1130        } else {
1131            base.actor_kind
1132        },
1133        actor_label: if user.actor_label != def.actor_label {
1134            user.actor_label
1135        } else {
1136            base.actor_label
1137        },
1138        agent: if user.agent != def.agent {
1139            user.agent
1140        } else {
1141            base.agent
1142        },
1143        model: if user.model != def.model {
1144            user.model
1145        } else {
1146            base.model
1147        },
1148        env: if user.env != def.env {
1149            user.env
1150        } else {
1151            base.env
1152        },
1153        job: if user.job != def.job {
1154            user.job
1155        } else {
1156            base.job
1157        },
1158        branch: if user.branch != def.branch {
1159            user.branch
1160        } else {
1161            base.branch
1162        },
1163    }
1164}
1165
1166fn merge_sync(base: SyncConfig, user: SyncConfig) -> SyncConfig {
1167    let def = SyncConfig::default();
1168    SyncConfig {
1169        endpoint: if !user.endpoint.is_empty() {
1170            user.endpoint
1171        } else {
1172            base.endpoint
1173        },
1174        team_token: if !user.team_token.is_empty() {
1175            user.team_token
1176        } else {
1177            base.team_token
1178        },
1179        team_id: if !user.team_id.is_empty() {
1180            user.team_id
1181        } else {
1182            base.team_id
1183        },
1184        events_per_batch_max: if user.events_per_batch_max != def.events_per_batch_max {
1185            user.events_per_batch_max
1186        } else {
1187            base.events_per_batch_max
1188        },
1189        max_body_bytes: if user.max_body_bytes != def.max_body_bytes {
1190            user.max_body_bytes
1191        } else {
1192            base.max_body_bytes
1193        },
1194        flush_interval_ms: if user.flush_interval_ms != def.flush_interval_ms {
1195            user.flush_interval_ms
1196        } else {
1197            base.flush_interval_ms
1198        },
1199        sample_rate: if (user.sample_rate - def.sample_rate).abs() > f64::EPSILON {
1200            user.sample_rate
1201        } else {
1202            base.sample_rate
1203        },
1204        team_salt_hex: if !user.team_salt_hex.is_empty() {
1205            user.team_salt_hex
1206        } else {
1207            base.team_salt_hex
1208        },
1209    }
1210}
1211
1212#[cfg(test)]
1213mod tests {
1214    use super::*;
1215    use std::io::Write;
1216    use tempfile::TempDir;
1217
1218    #[test]
1219    fn defaults_when_no_files() {
1220        let dir = TempDir::new().unwrap();
1221        let cfg = load(dir.path()).unwrap();
1222        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
1223        assert_eq!(cfg.scan.min_rescan_seconds, 300);
1224        assert_eq!(cfg.retention.hot_days, 30);
1225        assert_eq!(cfg.storage.cold_after_days, 7);
1226        assert_eq!(cfg.storage.hot_max_bytes_value(), 1_073_741_824);
1227    }
1228
1229    #[test]
1230    fn effective_redaction_salt_prefers_configured_team_salt() {
1231        let home = TempDir::new().unwrap();
1232        let sync = SyncConfig {
1233            team_salt_hex: "ab".repeat(32),
1234            ..Default::default()
1235        };
1236        let salt = effective_redaction_salt(&sync, home.path()).unwrap();
1237        assert_eq!(salt, [0xab_u8; 32]);
1238        // No local file written when team salt was sufficient.
1239        assert!(!home.path().join("local_salt.hex").exists());
1240    }
1241
1242    #[test]
1243    fn effective_redaction_salt_generates_and_persists_local_salt() {
1244        let home = TempDir::new().unwrap();
1245        let sync = SyncConfig::default();
1246        let a = effective_redaction_salt(&sync, home.path()).unwrap();
1247        let b = effective_redaction_salt(&sync, home.path()).unwrap();
1248        assert_eq!(a, b, "second call must reuse the persisted local salt");
1249        assert!(home.path().join("local_salt.hex").exists());
1250        #[cfg(unix)]
1251        {
1252            use std::os::unix::fs::PermissionsExt;
1253            let mode = std::fs::metadata(home.path().join("local_salt.hex"))
1254                .unwrap()
1255                .permissions()
1256                .mode()
1257                & 0o777;
1258            assert_eq!(mode, 0o600);
1259        }
1260    }
1261
1262    #[test]
1263    fn workspace_config_loaded() {
1264        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1265        let home = TempDir::new().unwrap();
1266        let ws = TempDir::new().unwrap();
1267        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1268        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1269        let mut f = std::fs::File::create(data_dir.join("config.toml")).unwrap();
1270        writeln!(f, "[scan]\nroots = [\"/custom/root\"]").unwrap();
1271        let cfg = load(ws.path()).unwrap();
1272        unsafe { std::env::remove_var("KAIZEN_HOME") };
1273        assert_eq!(cfg.scan.roots, vec!["/custom/root"]);
1274    }
1275
1276    #[test]
1277    fn invalid_toml_ignored() {
1278        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1279        let home = TempDir::new().unwrap();
1280        let ws = TempDir::new().unwrap();
1281        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1282        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1283        std::fs::write(data_dir.join("config.toml"), "not valid toml :::").unwrap();
1284        let cfg = load(ws.path()).unwrap();
1285        unsafe { std::env::remove_var("KAIZEN_HOME") };
1286        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
1287    }
1288
1289    #[test]
1290    fn merge_user_roots_win() {
1291        let base = Config {
1292            scan: ScanConfig {
1293                roots: vec!["/base".to_string()],
1294                ..ScanConfig::default()
1295            },
1296            ..Default::default()
1297        };
1298        let user = Config {
1299            scan: ScanConfig {
1300                roots: vec!["/user".to_string()],
1301                ..ScanConfig::default()
1302            },
1303            ..Default::default()
1304        };
1305        let merged = merge(base, user);
1306        assert_eq!(merged.scan.roots, vec!["/user"]);
1307    }
1308
1309    #[test]
1310    fn merge_sources_user_default_keeps_workspace_cursor() {
1311        let base = Config {
1312            sources: SourcesConfig {
1313                cursor: CursorSourceConfig {
1314                    enabled: false,
1315                    transcript_glob: "/workspace/glob/**".into(),
1316                },
1317                ..Default::default()
1318            },
1319            ..Default::default()
1320        };
1321        let user = Config::default();
1322        let merged = merge(base, user);
1323        assert!(!merged.sources.cursor.enabled);
1324        assert_eq!(merged.sources.cursor.transcript_glob, "/workspace/glob/**");
1325    }
1326
1327    #[test]
1328    fn merge_retention_field_by_field() {
1329        let base = Config {
1330            retention: RetentionConfig {
1331                hot_days: 60,
1332                warm_days: 90,
1333            },
1334            ..Default::default()
1335        };
1336        let user = Config {
1337            retention: RetentionConfig {
1338                hot_days: 30,
1339                warm_days: 45,
1340            },
1341            ..Default::default()
1342        };
1343        let merged = merge(base, user);
1344        assert_eq!(merged.retention.hot_days, 60);
1345        assert_eq!(merged.retention.warm_days, 45);
1346    }
1347
1348    #[test]
1349    fn merge_retention_user_hot_overrides() {
1350        let base = Config {
1351            retention: RetentionConfig {
1352                hot_days: 60,
1353                warm_days: 90,
1354            },
1355            ..Default::default()
1356        };
1357        let user = Config {
1358            retention: RetentionConfig {
1359                hot_days: 14,
1360                warm_days: 90,
1361            },
1362            ..Default::default()
1363        };
1364        let merged = merge(base, user);
1365        assert_eq!(merged.retention.hot_days, 14);
1366        assert_eq!(merged.retention.warm_days, 90);
1367    }
1368
1369    #[test]
1370    fn merge_storage_user_overrides() {
1371        let base = Config {
1372            storage: StorageConfig {
1373                hot_max_bytes: "2GB".into(),
1374                cold_after_days: 14,
1375                retention_days: 120,
1376                flush_hour_utc: 3,
1377            },
1378            ..Default::default()
1379        };
1380        let user = Config {
1381            storage: StorageConfig {
1382                cold_after_days: 3,
1383                ..StorageConfig::default()
1384            },
1385            ..Default::default()
1386        };
1387        let merged = merge(base, user);
1388        assert_eq!(merged.storage.hot_max_bytes, "2GB");
1389        assert_eq!(merged.storage.cold_after_days, 3);
1390        assert_eq!(merged.storage.retention_days, 120);
1391        assert_eq!(merged.storage.flush_hour_utc, 3);
1392    }
1393
1394    #[test]
1395    fn merge_telemetry_exporters_user_wins_non_empty() {
1396        let base = Config {
1397            telemetry: TelemetryConfig {
1398                fail_open: true,
1399                query: TelemetryQueryConfig::default(),
1400                exporters: vec![ExporterConfig::None],
1401            },
1402            ..Default::default()
1403        };
1404        let user = Config {
1405            telemetry: TelemetryConfig {
1406                fail_open: false,
1407                query: TelemetryQueryConfig::default(),
1408                exporters: vec![ExporterConfig::Dev { enabled: true }],
1409            },
1410            ..Default::default()
1411        };
1412        let merged = merge(base, user);
1413        assert!(!merged.telemetry.fail_open);
1414        assert_eq!(merged.telemetry.exporters.len(), 1);
1415    }
1416
1417    #[test]
1418    fn telemetry_query_defaults() {
1419        let t = TelemetryQueryConfig::default();
1420        assert_eq!(t.provider, QueryAuthority::None);
1421        assert_eq!(t.cache_ttl_seconds, 3600);
1422        assert!(!t.identity_allowlist.team);
1423        assert!(!t.has_provider_for_pull());
1424    }
1425
1426    #[test]
1427    fn telemetry_query_has_provider() {
1428        let ph = TelemetryQueryConfig {
1429            provider: QueryAuthority::Posthog,
1430            ..Default::default()
1431        };
1432        assert!(ph.has_provider_for_pull());
1433        let dd = TelemetryQueryConfig {
1434            provider: QueryAuthority::Datadog,
1435            ..Default::default()
1436        };
1437        assert!(dd.has_provider_for_pull());
1438    }
1439
1440    #[test]
1441    fn merge_telemetry_query_user_wins() {
1442        let base = Config {
1443            telemetry: TelemetryConfig {
1444                query: TelemetryQueryConfig {
1445                    provider: QueryAuthority::Posthog,
1446                    cache_ttl_seconds: 3600,
1447                    identity_allowlist: IdentityAllowlist {
1448                        team: true,
1449                        ..Default::default()
1450                    },
1451                },
1452                ..Default::default()
1453            },
1454            ..Default::default()
1455        };
1456        let user = Config {
1457            telemetry: TelemetryConfig {
1458                query: TelemetryQueryConfig {
1459                    cache_ttl_seconds: 7200,
1460                    ..Default::default()
1461                },
1462                ..Default::default()
1463            },
1464            ..Default::default()
1465        };
1466        let merged = merge(base, user);
1467        assert_eq!(merged.telemetry.query.provider, QueryAuthority::Posthog);
1468        assert_eq!(merged.telemetry.query.cache_ttl_seconds, 7200);
1469        assert!(merged.telemetry.query.identity_allowlist.team);
1470    }
1471
1472    #[test]
1473    fn toml_telemetry_query_roundtrip() {
1474        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1475        let home = TempDir::new().unwrap();
1476        let ws = TempDir::new().unwrap();
1477        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1478        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1479        let toml = r#"
1480[telemetry.query]
1481provider = "datadog"
1482cache_ttl_seconds = 1800
1483
1484[telemetry.query.identity_allowlist]
1485team = true
1486branch = true
1487"#;
1488        std::fs::write(data_dir.join("config.toml"), toml).unwrap();
1489        let cfg = load(ws.path()).unwrap();
1490        unsafe { std::env::remove_var("KAIZEN_HOME") };
1491        assert_eq!(cfg.telemetry.query.provider, QueryAuthority::Datadog);
1492        assert_eq!(cfg.telemetry.query.cache_ttl_seconds, 1800);
1493        assert!(cfg.telemetry.query.identity_allowlist.team);
1494        assert!(cfg.telemetry.query.identity_allowlist.branch);
1495        assert!(!cfg.telemetry.query.identity_allowlist.model);
1496    }
1497}