Skip to main content

kaizen/core/
config.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Config loading: workspace `.kaizen/config.toml` then `~/.kaizen/config.toml`.
3//! Missing files → defaults. User config wins on overlap.
4
5use anyhow::Result;
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
10pub struct ScanConfig {
11    pub roots: Vec<String>,
12    /// Minimum seconds between full agent transcript rescans when `--refresh` is not passed.
13    #[serde(default = "default_min_rescan_seconds")]
14    pub min_rescan_seconds: u64,
15}
16
17fn default_min_rescan_seconds() -> u64 {
18    300
19}
20
21impl Default for ScanConfig {
22    fn default() -> Self {
23        Self {
24            roots: vec!["~/.cursor/projects".to_string()],
25            min_rescan_seconds: default_min_rescan_seconds(),
26        }
27    }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct CursorSourceConfig {
32    pub enabled: bool,
33    pub transcript_glob: String,
34}
35
36impl Default for CursorSourceConfig {
37    fn default() -> Self {
38        Self {
39            enabled: true,
40            transcript_glob: "*/agent-transcripts".to_string(),
41        }
42    }
43}
44
45/// Enable tier-1 tail ingestion for agents that store data outside Cursor/Claude/Codex paths.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct TailAgentToggles {
48    #[serde(default = "default_true")]
49    pub goose: bool,
50    #[serde(default = "default_true")]
51    pub openclaw: bool,
52    #[serde(default = "default_true")]
53    pub opencode: bool,
54    #[serde(default = "default_true")]
55    pub copilot_cli: bool,
56    #[serde(default = "default_true")]
57    pub copilot_vscode: bool,
58}
59
60impl Default for TailAgentToggles {
61    fn default() -> Self {
62        Self {
63            goose: true,
64            openclaw: true,
65            opencode: true,
66            copilot_cli: true,
67            copilot_vscode: true,
68        }
69    }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize, Default)]
73pub struct SourcesConfig {
74    #[serde(default)]
75    pub cursor: CursorSourceConfig,
76    #[serde(default)]
77    pub tail: TailAgentToggles,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
81pub struct RetentionConfig {
82    pub hot_days: u32,
83    pub warm_days: u32,
84}
85
86impl Default for RetentionConfig {
87    fn default() -> Self {
88        Self {
89            hot_days: 30,
90            warm_days: 90,
91        }
92    }
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
96pub struct StorageConfig {
97    pub hot_max_bytes: String,
98    pub cold_after_days: u32,
99    pub retention_days: u32,
100    pub flush_hour_utc: u8,
101}
102
103impl Default for StorageConfig {
104    fn default() -> Self {
105        Self {
106            hot_max_bytes: "1GB".into(),
107            cold_after_days: 7,
108            retention_days: 90,
109            flush_hour_utc: 0,
110        }
111    }
112}
113
114impl StorageConfig {
115    pub fn hot_max_bytes_value(&self) -> u64 {
116        parse_byte_size(&self.hot_max_bytes).unwrap_or(1_073_741_824)
117    }
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct SyncConfig {
122    /// When empty, sync is disabled (no outbox enqueue, `sync run` no-ops flush).
123    #[serde(default)]
124    pub endpoint: String,
125    #[serde(default)]
126    pub team_token: String,
127    #[serde(default)]
128    pub team_id: String,
129    #[serde(default = "default_events_per_batch")]
130    pub events_per_batch_max: usize,
131    #[serde(default = "default_max_body_bytes")]
132    pub max_body_bytes: usize,
133    #[serde(default = "default_flush_interval_ms")]
134    pub flush_interval_ms: u64,
135    #[serde(default = "default_sample_rate")]
136    pub sample_rate: f64,
137    /// 64 hex chars (32 bytes). Prefer `~/.kaizen/config.toml` only; never committed workspace secrets.
138    #[serde(default)]
139    pub team_salt_hex: String,
140}
141
142fn default_events_per_batch() -> usize {
143    500
144}
145
146fn default_max_body_bytes() -> usize {
147    1_000_000
148}
149
150fn default_flush_interval_ms() -> u64 {
151    10_000
152}
153
154fn default_sample_rate() -> f64 {
155    1.0
156}
157
158impl Default for SyncConfig {
159    fn default() -> Self {
160        Self {
161            endpoint: String::new(),
162            team_token: String::new(),
163            team_id: String::new(),
164            events_per_batch_max: default_events_per_batch(),
165            max_body_bytes: default_max_body_bytes(),
166            flush_interval_ms: default_flush_interval_ms(),
167            sample_rate: default_sample_rate(),
168            team_salt_hex: String::new(),
169        }
170    }
171}
172
173/// Parse `team_salt_hex` into 32 bytes. Returns `None` if missing or invalid.
174pub fn try_team_salt(cfg: &SyncConfig) -> Option<[u8; 32]> {
175    let h = cfg.team_salt_hex.trim();
176    if h.len() != 64 {
177        return None;
178    }
179    let bytes = hex::decode(h).ok()?;
180    bytes.try_into().ok()
181}
182
183/// Resolve a 32-byte redaction salt for telemetry-only flows (push/test) when sync is not
184/// configured. Order: configured `[sync].team_salt_hex` → `<kaizen_home>/local_salt.hex`
185/// → freshly generated and persisted at `0o600`. Telemetry never blocks on cloud sync.
186pub fn effective_redaction_salt(
187    cfg: &SyncConfig,
188    kaizen_home: &std::path::Path,
189) -> Result<[u8; 32]> {
190    if let Some(s) = try_team_salt(cfg) {
191        return Ok(s);
192    }
193    let path = kaizen_home.join("local_salt.hex");
194    if let Some(s) = read_local_salt(&path)? {
195        return Ok(s);
196    }
197    let bytes = generate_local_salt();
198    write_local_salt(&path, &bytes)?;
199    Ok(bytes)
200}
201
202fn read_local_salt(path: &std::path::Path) -> Result<Option<[u8; 32]>> {
203    use std::io::ErrorKind;
204    match std::fs::read_to_string(path) {
205        Ok(s) => Ok(parse_salt_hex(s.trim())),
206        Err(e) if e.kind() == ErrorKind::NotFound => Ok(None),
207        Err(e) => Err(e.into()),
208    }
209}
210
211fn parse_salt_hex(h: &str) -> Option<[u8; 32]> {
212    if h.len() != 64 {
213        return None;
214    }
215    hex::decode(h).ok()?.try_into().ok()
216}
217
218fn generate_local_salt() -> [u8; 32] {
219    use rand::Rng;
220    let mut bytes = [0u8; 32];
221    rand::rng().fill_bytes(&mut bytes);
222    bytes
223}
224
225fn write_local_salt(path: &std::path::Path, bytes: &[u8; 32]) -> Result<()> {
226    if let Some(parent) = path.parent() {
227        std::fs::create_dir_all(parent)?;
228    }
229    let hex_s = hex::encode(bytes);
230    std::fs::write(path, hex_s.as_bytes())?;
231    set_user_only_perms(path)?;
232    Ok(())
233}
234
235#[cfg(unix)]
236fn set_user_only_perms(path: &std::path::Path) -> Result<()> {
237    use std::os::unix::fs::PermissionsExt;
238    let mut perms = std::fs::metadata(path)?.permissions();
239    perms.set_mode(0o600);
240    std::fs::set_permissions(path, perms)?;
241    Ok(())
242}
243
244#[cfg(not(unix))]
245fn set_user_only_perms(_path: &std::path::Path) -> Result<()> {
246    Ok(())
247}
248
249fn default_true() -> bool {
250    true
251}
252
253fn default_telemetry_fail_open() -> bool {
254    true
255}
256
257fn default_cache_ttl_seconds() -> u64 {
258    3600
259}
260
261/// Which third-party system is the single source for query-back / pull; OTLP is export-only, not a pull target.
262#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
263#[serde(rename_all = "lowercase")]
264pub enum QueryAuthority {
265    #[default]
266    None,
267    Posthog,
268    Datadog,
269}
270
271/// Per-field allowlist: when `false` (default), the field is omitted or hashed in telemetry exports.
272#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
273pub struct IdentityAllowlist {
274    #[serde(default)]
275    pub team: bool,
276    #[serde(default)]
277    pub workspace_label: bool,
278    #[serde(default)]
279    pub runner_label: bool,
280    #[serde(default)]
281    pub actor_kind: bool,
282    #[serde(default)]
283    pub actor_label: bool,
284    #[serde(default)]
285    pub agent: bool,
286    #[serde(default)]
287    pub model: bool,
288    #[serde(default)]
289    pub env: bool,
290    #[serde(default)]
291    pub job: bool,
292    #[serde(default)]
293    pub branch: bool,
294}
295
296/// Remote pull: query authority, cache TTL, and which identity labels may leave as cleartext.
297#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
298pub struct TelemetryQueryConfig {
299    /// `posthog` or `datadog` enables provider pull when implemented; `none` or unset = no query authority.
300    #[serde(default)]
301    pub provider: QueryAuthority,
302    /// Seconds to treat remote cache rows as fresh (unless the CLI requests `--refresh`).
303    #[serde(default = "default_cache_ttl_seconds")]
304    pub cache_ttl_seconds: u64,
305    #[serde(default)]
306    pub identity_allowlist: IdentityAllowlist,
307}
308
309impl Default for TelemetryQueryConfig {
310    fn default() -> Self {
311        Self {
312            provider: QueryAuthority::default(),
313            cache_ttl_seconds: default_cache_ttl_seconds(),
314            identity_allowlist: IdentityAllowlist::default(),
315        }
316    }
317}
318
319impl TelemetryQueryConfig {
320    /// True when a PostHog or Datadog pull backend may be used (OTLP is not a pull target).
321    pub fn has_provider_for_pull(&self) -> bool {
322        matches!(
323            self.provider,
324            QueryAuthority::Posthog | QueryAuthority::Datadog
325        )
326    }
327}
328
329/// How to reduce billed input to the model (opt-in; default leaves requests unchanged).
330#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
331#[serde(tag = "type", rename_all = "snake_case")]
332pub enum ContextPolicy {
333    /// No transformation beyond optional JSON minify (same tokens as a direct call).
334    #[default]
335    None,
336    /// Keep the last `count` `messages` array entries; system blocks unchanged when present.
337    LastMessages { count: usize },
338    /// Drop oldest messages until a rough `chars/4` estimate stays at or below `max`.
339    MaxInputTokens { max: u32 },
340}
341
342/// Anthropic API-compatible HTTP proxy: forward + local telemetry. See `docs/llm-proxy.md`.
343#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
344pub struct ProxyConfig {
345    /// e.g. `127.0.0.1:3847` (bind address for `kaizen proxy run`).
346    #[serde(default = "default_proxy_listen")]
347    pub listen: String,
348    /// Base URL, no trailing slash, e.g. `https://api.anthropic.com`.
349    #[serde(default = "default_proxy_upstream")]
350    pub upstream: String,
351    /// Prefer `Accept-Encoding: gzip` to upstream (response bodies may be gzip).
352    #[serde(default = "default_true")]
353    pub compress_transport: bool,
354    /// Re-encode JSON bodies to compact `serde_json` (no key reorder; whitespace only).
355    #[serde(default = "default_true")]
356    pub minify_json: bool,
357    /// Slurp cap for a single upstream response (streaming not yet teed; see doc).
358    #[serde(default = "default_proxy_max_body_mb")]
359    pub max_response_body_mb: u32,
360    /// Reject / fail incoming client bodies above this (POST bodies before forward).
361    #[serde(default = "default_proxy_max_request_body_mb")]
362    pub max_request_body_mb: u32,
363    /// Optional token-aware truncation of `messages` in JSON bodies.
364    #[serde(default)]
365    pub context_policy: ContextPolicy,
366}
367
368fn default_proxy_listen() -> String {
369    "127.0.0.1:3847".to_string()
370}
371
372fn default_proxy_upstream() -> String {
373    "https://api.anthropic.com".to_string()
374}
375
376fn default_proxy_max_body_mb() -> u32 {
377    256
378}
379
380fn default_proxy_max_request_body_mb() -> u32 {
381    32
382}
383
384impl Default for ProxyConfig {
385    fn default() -> Self {
386        Self {
387            listen: default_proxy_listen(),
388            upstream: default_proxy_upstream(),
389            compress_transport: true,
390            minify_json: true,
391            max_response_body_mb: default_proxy_max_body_mb(),
392            max_request_body_mb: default_proxy_max_request_body_mb(),
393            context_policy: ContextPolicy::default(),
394        }
395    }
396}
397
398/// Optional third-party telemetry sinks; same redacted batches as Kaizen sync.
399#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct TelemetryConfig {
401    /// When `true` (default), ignore exporter errors; when `false`, `flush` fails if any secondary errors.
402    #[serde(default = "default_telemetry_fail_open")]
403    pub fail_open: bool,
404    /// Query-back / pull API: authority, cache TTL, identity allowlist.
405    #[serde(default)]
406    pub query: TelemetryQueryConfig,
407    /// Declarative list; `type = "none"` rows are accepted and ignored.
408    #[serde(default)]
409    pub exporters: Vec<ExporterConfig>,
410}
411
412impl Default for TelemetryConfig {
413    fn default() -> Self {
414        Self {
415            fail_open: default_telemetry_fail_open(),
416            query: TelemetryQueryConfig::default(),
417            exporters: Vec::new(),
418        }
419    }
420}
421
422/// One pluggable sink; TOML `type` is the tag.
423#[derive(Debug, Clone, Serialize, Deserialize)]
424#[serde(tag = "type", rename_all = "lowercase")]
425pub enum ExporterConfig {
426    /// No-op row for sparse tables / templates.
427    None,
428    /// Append summary JSON lines to a local NDJSON file (default `<workspace>/.kaizen/telemetry.ndjson`).
429    File {
430        #[serde(default = "default_true")]
431        enabled: bool,
432        #[serde(default)]
433        path: Option<String>,
434    },
435    /// Echo to tracing (for wiring tests; requires the `telemetry-dev` build feature).
436    Dev {
437        #[serde(default = "default_true")]
438        enabled: bool,
439    },
440    PostHog {
441        #[serde(default = "default_true")]
442        enabled: bool,
443        /// e.g. `https://us.i.posthog.com` (default when unset)
444        host: Option<String>,
445        /// Prefer env `POSTHOG_API_KEY` or `KAIZEN_POSTHOG_API_KEY`
446        project_api_key: Option<String>,
447    },
448    Datadog {
449        #[serde(default = "default_true")]
450        enabled: bool,
451        /// e.g. `datadoghq.com`; env `DD_SITE` overrides
452        site: Option<String>,
453        /// Prefer env `DD_API_KEY` or `KAIZEN_DD_API_KEY`
454        api_key: Option<String>,
455    },
456    Otlp {
457        #[serde(default = "default_true")]
458        enabled: bool,
459        /// Env `OTEL_EXPORTER_OTLP_ENDPOINT` (or KAIZEN_ prefix) when unset here
460        endpoint: Option<String>,
461    },
462}
463
464impl ExporterConfig {
465    /// Whether this row should be considered for `load_exporters` (excludes `None`).
466    pub fn is_enabled(&self) -> bool {
467        match self {
468            ExporterConfig::None => false,
469            ExporterConfig::File { enabled, .. } => *enabled,
470            ExporterConfig::Dev { enabled, .. } => *enabled,
471            ExporterConfig::PostHog { enabled, .. } => *enabled,
472            ExporterConfig::Datadog { enabled, .. } => *enabled,
473            ExporterConfig::Otlp { enabled, .. } => *enabled,
474        }
475    }
476}
477
478#[derive(Debug, Clone, Serialize, Deserialize)]
479pub struct EvalConfig {
480    #[serde(default)]
481    pub enabled: bool,
482    #[serde(default = "default_eval_endpoint")]
483    pub endpoint: String,
484    #[serde(default)]
485    pub api_key: String,
486    #[serde(default = "default_eval_model")]
487    pub model: String,
488    #[serde(default = "default_eval_rubric")]
489    pub rubric: String,
490    #[serde(default = "default_eval_batch_size")]
491    pub batch_size: usize,
492    #[serde(default = "default_eval_min_cost")]
493    pub min_cost_usd: f64,
494}
495
496impl Default for EvalConfig {
497    fn default() -> Self {
498        Self {
499            enabled: false,
500            endpoint: default_eval_endpoint(),
501            api_key: String::new(),
502            model: default_eval_model(),
503            rubric: default_eval_rubric(),
504            batch_size: default_eval_batch_size(),
505            min_cost_usd: default_eval_min_cost(),
506        }
507    }
508}
509
510fn default_eval_endpoint() -> String {
511    "https://api.anthropic.com".into()
512}
513fn default_eval_model() -> String {
514    "claude-haiku-4-5-20251001".into()
515}
516fn default_eval_rubric() -> String {
517    "tool-efficiency-v1".into()
518}
519fn default_eval_batch_size() -> usize {
520    20
521}
522fn default_eval_min_cost() -> f64 {
523    0.01
524}
525
526/// Opt-in post-hook outcome measurement (Tier C).
527#[derive(Debug, Clone, Serialize, Deserialize)]
528pub struct CollectOutcomesConfig {
529    #[serde(default)]
530    pub enabled: bool,
531    #[serde(default = "default_outcomes_test_cmd")]
532    pub test_cmd: String,
533    #[serde(default = "default_outcomes_timeout_secs")]
534    pub timeout_secs: u64,
535    #[serde(default)]
536    pub lint_cmd: Option<String>,
537}
538
539fn default_outcomes_test_cmd() -> String {
540    "cargo test --quiet".to_string()
541}
542
543fn default_outcomes_timeout_secs() -> u64 {
544    600
545}
546
547impl Default for CollectOutcomesConfig {
548    fn default() -> Self {
549        Self {
550            enabled: false,
551            test_cmd: default_outcomes_test_cmd(),
552            timeout_secs: default_outcomes_timeout_secs(),
553            lint_cmd: None,
554        }
555    }
556}
557
558/// Opt-in per-process sampling (Tier D).
559#[derive(Debug, Clone, Serialize, Deserialize)]
560pub struct CollectSystemSamplerConfig {
561    #[serde(default)]
562    pub enabled: bool,
563    #[serde(default = "default_sampler_sample_ms")]
564    pub sample_ms: u64,
565    #[serde(default = "default_sampler_max_samples")]
566    pub max_samples_per_session: u32,
567}
568
569fn default_sampler_sample_ms() -> u64 {
570    2000
571}
572
573fn default_sampler_max_samples() -> u32 {
574    3600
575}
576
577impl Default for CollectSystemSamplerConfig {
578    fn default() -> Self {
579        Self {
580            enabled: false,
581            sample_ms: default_sampler_sample_ms(),
582            max_samples_per_session: default_sampler_max_samples(),
583        }
584    }
585}
586
587#[derive(Debug, Clone, Serialize, Deserialize, Default)]
588pub struct CollectConfig {
589    #[serde(default)]
590    pub outcomes: CollectOutcomesConfig,
591    #[serde(default)]
592    pub system_sampler: CollectSystemSamplerConfig,
593}
594
595#[derive(Debug, Clone, Serialize, Deserialize, Default)]
596pub struct Config {
597    #[serde(default)]
598    pub scan: ScanConfig,
599    #[serde(default)]
600    pub sources: SourcesConfig,
601    #[serde(default)]
602    pub retention: RetentionConfig,
603    #[serde(default)]
604    pub storage: StorageConfig,
605    #[serde(default)]
606    pub sync: SyncConfig,
607    #[serde(default)]
608    pub telemetry: TelemetryConfig,
609    #[serde(default)]
610    pub proxy: ProxyConfig,
611    #[serde(default)]
612    pub eval: EvalConfig,
613    #[serde(default)]
614    pub collect: CollectConfig,
615}
616
617/// Load config: `~/.kaizen/projects/<slug>/config.toml` then `~/.kaizen/config.toml`.
618/// User config wins on overlap. Missing files → defaults, not error.
619pub fn load(workspace: &Path) -> Result<Config> {
620    let project_cfg = crate::core::paths::project_data_dir(workspace)
621        .ok()
622        .map(|d| d.join("config.toml"));
623    let user_path = crate::core::paths::kaizen_dir()
624        .ok_or_else(|| anyhow::anyhow!("KAIZEN_HOME / HOME unset"))?
625        .join("config.toml");
626
627    let base = project_cfg
628        .as_deref()
629        .and_then(load_file)
630        .unwrap_or_default();
631    let user = load_file(&user_path).unwrap_or_default();
632    Ok(merge(base, user))
633}
634
635fn load_file(path: &Path) -> Option<Config> {
636    let text = std::fs::read_to_string(path).ok()?;
637    toml::from_str(&text).ok()
638}
639
640fn merge(base: Config, user: Config) -> Config {
641    Config {
642        scan: merge_scan(base.scan, user.scan),
643        sources: merge_sources(base.sources, user.sources),
644        retention: merge_retention(base.retention, user.retention),
645        storage: merge_storage(base.storage, user.storage),
646        sync: merge_sync(base.sync, user.sync),
647        telemetry: merge_telemetry(base.telemetry, user.telemetry),
648        proxy: merge_proxy(base.proxy, user.proxy),
649        eval: merge_eval(base.eval, user.eval),
650        collect: merge_collect(base.collect, user.collect),
651    }
652}
653
654fn merge_collect(base: CollectConfig, user: CollectConfig) -> CollectConfig {
655    let def = CollectConfig::default();
656    CollectConfig {
657        outcomes: merge_collect_outcomes(base.outcomes, user.outcomes, def.outcomes),
658        system_sampler: merge_collect_sampler(
659            base.system_sampler,
660            user.system_sampler,
661            def.system_sampler,
662        ),
663    }
664}
665
666fn merge_collect_outcomes(
667    base: CollectOutcomesConfig,
668    user: CollectOutcomesConfig,
669    def: CollectOutcomesConfig,
670) -> CollectOutcomesConfig {
671    CollectOutcomesConfig {
672        enabled: if user.enabled != def.enabled {
673            user.enabled
674        } else {
675            base.enabled
676        },
677        test_cmd: if user.test_cmd != def.test_cmd {
678            user.test_cmd
679        } else {
680            base.test_cmd
681        },
682        timeout_secs: if user.timeout_secs != def.timeout_secs {
683            user.timeout_secs
684        } else {
685            base.timeout_secs
686        },
687        lint_cmd: user.lint_cmd.or(base.lint_cmd),
688    }
689}
690
691fn merge_collect_sampler(
692    base: CollectSystemSamplerConfig,
693    user: CollectSystemSamplerConfig,
694    def: CollectSystemSamplerConfig,
695) -> CollectSystemSamplerConfig {
696    CollectSystemSamplerConfig {
697        enabled: if user.enabled != def.enabled {
698            user.enabled
699        } else {
700            base.enabled
701        },
702        sample_ms: if user.sample_ms != def.sample_ms {
703            user.sample_ms
704        } else {
705            base.sample_ms
706        },
707        max_samples_per_session: if user.max_samples_per_session != def.max_samples_per_session {
708            user.max_samples_per_session
709        } else {
710            base.max_samples_per_session
711        },
712    }
713}
714
715fn merge_sources(base: SourcesConfig, user: SourcesConfig) -> SourcesConfig {
716    let def = SourcesConfig::default();
717    SourcesConfig {
718        cursor: merge_cursor_source(base.cursor, user.cursor, def.cursor),
719        tail: merge_tail_toggles(base.tail, user.tail, def.tail),
720    }
721}
722
723fn merge_cursor_source(
724    base: CursorSourceConfig,
725    user: CursorSourceConfig,
726    def: CursorSourceConfig,
727) -> CursorSourceConfig {
728    CursorSourceConfig {
729        enabled: if user.enabled != def.enabled {
730            user.enabled
731        } else {
732            base.enabled
733        },
734        transcript_glob: if user.transcript_glob != def.transcript_glob {
735            user.transcript_glob
736        } else {
737            base.transcript_glob
738        },
739    }
740}
741
742fn merge_tail_toggles(
743    base: TailAgentToggles,
744    user: TailAgentToggles,
745    def: TailAgentToggles,
746) -> TailAgentToggles {
747    TailAgentToggles {
748        goose: if user.goose != def.goose {
749            user.goose
750        } else {
751            base.goose
752        },
753        openclaw: if user.openclaw != def.openclaw {
754            user.openclaw
755        } else {
756            base.openclaw
757        },
758        opencode: if user.opencode != def.opencode {
759            user.opencode
760        } else {
761            base.opencode
762        },
763        copilot_cli: if user.copilot_cli != def.copilot_cli {
764            user.copilot_cli
765        } else {
766            base.copilot_cli
767        },
768        copilot_vscode: if user.copilot_vscode != def.copilot_vscode {
769            user.copilot_vscode
770        } else {
771            base.copilot_vscode
772        },
773    }
774}
775
776fn merge_eval(base: EvalConfig, user: EvalConfig) -> EvalConfig {
777    let def = EvalConfig::default();
778    EvalConfig {
779        enabled: if user.enabled != def.enabled {
780            user.enabled
781        } else {
782            base.enabled
783        },
784        endpoint: if user.endpoint != def.endpoint {
785            user.endpoint
786        } else {
787            base.endpoint
788        },
789        api_key: if !user.api_key.is_empty() {
790            user.api_key
791        } else {
792            base.api_key
793        },
794        model: if user.model != def.model {
795            user.model
796        } else {
797            base.model
798        },
799        rubric: if user.rubric != def.rubric {
800            user.rubric
801        } else {
802            base.rubric
803        },
804        batch_size: if user.batch_size != def.batch_size {
805            user.batch_size
806        } else {
807            base.batch_size
808        },
809        min_cost_usd: if user.min_cost_usd != def.min_cost_usd {
810            user.min_cost_usd
811        } else {
812            base.min_cost_usd
813        },
814    }
815}
816
817fn merge_scan(base: ScanConfig, user: ScanConfig) -> ScanConfig {
818    let def = ScanConfig::default();
819    ScanConfig {
820        roots: if user.roots != def.roots {
821            user.roots
822        } else {
823            base.roots
824        },
825        min_rescan_seconds: if user.min_rescan_seconds != def.min_rescan_seconds {
826            user.min_rescan_seconds
827        } else {
828            base.min_rescan_seconds
829        },
830    }
831}
832
833fn merge_retention(base: RetentionConfig, user: RetentionConfig) -> RetentionConfig {
834    let def = RetentionConfig::default();
835    RetentionConfig {
836        hot_days: if user.hot_days != def.hot_days {
837            user.hot_days
838        } else {
839            base.hot_days
840        },
841        warm_days: if user.warm_days != def.warm_days {
842            user.warm_days
843        } else {
844            base.warm_days
845        },
846    }
847}
848
849fn merge_storage(base: StorageConfig, user: StorageConfig) -> StorageConfig {
850    let def = StorageConfig::default();
851    StorageConfig {
852        hot_max_bytes: if user.hot_max_bytes != def.hot_max_bytes {
853            user.hot_max_bytes
854        } else {
855            base.hot_max_bytes
856        },
857        cold_after_days: if user.cold_after_days != def.cold_after_days {
858            user.cold_after_days
859        } else {
860            base.cold_after_days
861        },
862        retention_days: if user.retention_days != def.retention_days {
863            user.retention_days
864        } else {
865            base.retention_days
866        },
867        flush_hour_utc: if user.flush_hour_utc != def.flush_hour_utc {
868            user.flush_hour_utc
869        } else {
870            base.flush_hour_utc
871        },
872    }
873}
874
875fn parse_byte_size(raw: &str) -> Option<u64> {
876    let s = raw.trim();
877    let digits = s
878        .chars()
879        .take_while(|c| c.is_ascii_digit())
880        .collect::<String>();
881    let n = digits.parse::<u64>().ok()?;
882    let unit = s[digits.len()..].trim().to_ascii_lowercase();
883    Some(match unit.as_str() {
884        "" | "b" => n,
885        "kb" | "kib" => n.saturating_mul(1024),
886        "mb" | "mib" => n.saturating_mul(1024 * 1024),
887        "gb" | "gib" => n.saturating_mul(1024 * 1024 * 1024),
888        _ => return None,
889    })
890}
891
892fn merge_proxy(base: ProxyConfig, user: ProxyConfig) -> ProxyConfig {
893    let def = ProxyConfig::default();
894    ProxyConfig {
895        listen: if user.listen != def.listen {
896            user.listen
897        } else {
898            base.listen
899        },
900        upstream: if user.upstream != def.upstream {
901            user.upstream
902        } else {
903            base.upstream
904        },
905        compress_transport: if user.compress_transport != def.compress_transport {
906            user.compress_transport
907        } else {
908            base.compress_transport
909        },
910        minify_json: if user.minify_json != def.minify_json {
911            user.minify_json
912        } else {
913            base.minify_json
914        },
915        max_response_body_mb: if user.max_response_body_mb != def.max_response_body_mb {
916            user.max_response_body_mb
917        } else {
918            base.max_response_body_mb
919        },
920        max_request_body_mb: if user.max_request_body_mb != def.max_request_body_mb {
921            user.max_request_body_mb
922        } else {
923            base.max_request_body_mb
924        },
925        context_policy: if user.context_policy != def.context_policy {
926            user.context_policy
927        } else {
928            base.context_policy
929        },
930    }
931}
932
933fn merge_telemetry(base: TelemetryConfig, user: TelemetryConfig) -> TelemetryConfig {
934    let def = TelemetryConfig::default();
935    let fail_open = if user.fail_open != def.fail_open {
936        user.fail_open
937    } else {
938        base.fail_open
939    };
940    let query = merge_telemetry_query(base.query, user.query);
941    let exporters = if !user.exporters.is_empty() {
942        user.exporters
943    } else {
944        base.exporters
945    };
946    TelemetryConfig {
947        fail_open,
948        query,
949        exporters,
950    }
951}
952
953fn merge_telemetry_query(
954    base: TelemetryQueryConfig,
955    user: TelemetryQueryConfig,
956) -> TelemetryQueryConfig {
957    let def = TelemetryQueryConfig::default();
958    TelemetryQueryConfig {
959        provider: if user.provider != def.provider {
960            user.provider
961        } else {
962            base.provider
963        },
964        cache_ttl_seconds: if user.cache_ttl_seconds != def.cache_ttl_seconds {
965            user.cache_ttl_seconds
966        } else {
967            base.cache_ttl_seconds
968        },
969        identity_allowlist: merge_identity_allowlist(
970            base.identity_allowlist,
971            user.identity_allowlist,
972        ),
973    }
974}
975
976fn merge_identity_allowlist(base: IdentityAllowlist, user: IdentityAllowlist) -> IdentityAllowlist {
977    let def = IdentityAllowlist::default();
978    IdentityAllowlist {
979        team: if user.team != def.team {
980            user.team
981        } else {
982            base.team
983        },
984        workspace_label: if user.workspace_label != def.workspace_label {
985            user.workspace_label
986        } else {
987            base.workspace_label
988        },
989        runner_label: if user.runner_label != def.runner_label {
990            user.runner_label
991        } else {
992            base.runner_label
993        },
994        actor_kind: if user.actor_kind != def.actor_kind {
995            user.actor_kind
996        } else {
997            base.actor_kind
998        },
999        actor_label: if user.actor_label != def.actor_label {
1000            user.actor_label
1001        } else {
1002            base.actor_label
1003        },
1004        agent: if user.agent != def.agent {
1005            user.agent
1006        } else {
1007            base.agent
1008        },
1009        model: if user.model != def.model {
1010            user.model
1011        } else {
1012            base.model
1013        },
1014        env: if user.env != def.env {
1015            user.env
1016        } else {
1017            base.env
1018        },
1019        job: if user.job != def.job {
1020            user.job
1021        } else {
1022            base.job
1023        },
1024        branch: if user.branch != def.branch {
1025            user.branch
1026        } else {
1027            base.branch
1028        },
1029    }
1030}
1031
1032fn merge_sync(base: SyncConfig, user: SyncConfig) -> SyncConfig {
1033    let def = SyncConfig::default();
1034    SyncConfig {
1035        endpoint: if !user.endpoint.is_empty() {
1036            user.endpoint
1037        } else {
1038            base.endpoint
1039        },
1040        team_token: if !user.team_token.is_empty() {
1041            user.team_token
1042        } else {
1043            base.team_token
1044        },
1045        team_id: if !user.team_id.is_empty() {
1046            user.team_id
1047        } else {
1048            base.team_id
1049        },
1050        events_per_batch_max: if user.events_per_batch_max != def.events_per_batch_max {
1051            user.events_per_batch_max
1052        } else {
1053            base.events_per_batch_max
1054        },
1055        max_body_bytes: if user.max_body_bytes != def.max_body_bytes {
1056            user.max_body_bytes
1057        } else {
1058            base.max_body_bytes
1059        },
1060        flush_interval_ms: if user.flush_interval_ms != def.flush_interval_ms {
1061            user.flush_interval_ms
1062        } else {
1063            base.flush_interval_ms
1064        },
1065        sample_rate: if (user.sample_rate - def.sample_rate).abs() > f64::EPSILON {
1066            user.sample_rate
1067        } else {
1068            base.sample_rate
1069        },
1070        team_salt_hex: if !user.team_salt_hex.is_empty() {
1071            user.team_salt_hex
1072        } else {
1073            base.team_salt_hex
1074        },
1075    }
1076}
1077
1078#[cfg(test)]
1079mod tests {
1080    use super::*;
1081    use std::io::Write;
1082    use tempfile::TempDir;
1083
1084    #[test]
1085    fn defaults_when_no_files() {
1086        let dir = TempDir::new().unwrap();
1087        let cfg = load(dir.path()).unwrap();
1088        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
1089        assert_eq!(cfg.scan.min_rescan_seconds, 300);
1090        assert_eq!(cfg.retention.hot_days, 30);
1091        assert_eq!(cfg.storage.cold_after_days, 7);
1092        assert_eq!(cfg.storage.hot_max_bytes_value(), 1_073_741_824);
1093    }
1094
1095    #[test]
1096    fn effective_redaction_salt_prefers_configured_team_salt() {
1097        let home = TempDir::new().unwrap();
1098        let sync = SyncConfig {
1099            team_salt_hex: "ab".repeat(32),
1100            ..Default::default()
1101        };
1102        let salt = effective_redaction_salt(&sync, home.path()).unwrap();
1103        assert_eq!(salt, [0xab_u8; 32]);
1104        // No local file written when team salt was sufficient.
1105        assert!(!home.path().join("local_salt.hex").exists());
1106    }
1107
1108    #[test]
1109    fn effective_redaction_salt_generates_and_persists_local_salt() {
1110        let home = TempDir::new().unwrap();
1111        let sync = SyncConfig::default();
1112        let a = effective_redaction_salt(&sync, home.path()).unwrap();
1113        let b = effective_redaction_salt(&sync, home.path()).unwrap();
1114        assert_eq!(a, b, "second call must reuse the persisted local salt");
1115        assert!(home.path().join("local_salt.hex").exists());
1116        #[cfg(unix)]
1117        {
1118            use std::os::unix::fs::PermissionsExt;
1119            let mode = std::fs::metadata(home.path().join("local_salt.hex"))
1120                .unwrap()
1121                .permissions()
1122                .mode()
1123                & 0o777;
1124            assert_eq!(mode, 0o600);
1125        }
1126    }
1127
1128    #[test]
1129    fn workspace_config_loaded() {
1130        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1131        let home = TempDir::new().unwrap();
1132        let ws = TempDir::new().unwrap();
1133        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1134        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1135        let mut f = std::fs::File::create(data_dir.join("config.toml")).unwrap();
1136        writeln!(f, "[scan]\nroots = [\"/custom/root\"]").unwrap();
1137        let cfg = load(ws.path()).unwrap();
1138        unsafe { std::env::remove_var("KAIZEN_HOME") };
1139        assert_eq!(cfg.scan.roots, vec!["/custom/root"]);
1140    }
1141
1142    #[test]
1143    fn invalid_toml_ignored() {
1144        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1145        let home = TempDir::new().unwrap();
1146        let ws = TempDir::new().unwrap();
1147        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1148        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1149        std::fs::write(data_dir.join("config.toml"), "not valid toml :::").unwrap();
1150        let cfg = load(ws.path()).unwrap();
1151        unsafe { std::env::remove_var("KAIZEN_HOME") };
1152        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
1153    }
1154
1155    #[test]
1156    fn merge_user_roots_win() {
1157        let base = Config {
1158            scan: ScanConfig {
1159                roots: vec!["/base".to_string()],
1160                ..ScanConfig::default()
1161            },
1162            ..Default::default()
1163        };
1164        let user = Config {
1165            scan: ScanConfig {
1166                roots: vec!["/user".to_string()],
1167                ..ScanConfig::default()
1168            },
1169            ..Default::default()
1170        };
1171        let merged = merge(base, user);
1172        assert_eq!(merged.scan.roots, vec!["/user"]);
1173    }
1174
1175    #[test]
1176    fn merge_sources_user_default_keeps_workspace_cursor() {
1177        let base = Config {
1178            sources: SourcesConfig {
1179                cursor: CursorSourceConfig {
1180                    enabled: false,
1181                    transcript_glob: "/workspace/glob/**".into(),
1182                },
1183                ..Default::default()
1184            },
1185            ..Default::default()
1186        };
1187        let user = Config::default();
1188        let merged = merge(base, user);
1189        assert!(!merged.sources.cursor.enabled);
1190        assert_eq!(merged.sources.cursor.transcript_glob, "/workspace/glob/**");
1191    }
1192
1193    #[test]
1194    fn merge_retention_field_by_field() {
1195        let base = Config {
1196            retention: RetentionConfig {
1197                hot_days: 60,
1198                warm_days: 90,
1199            },
1200            ..Default::default()
1201        };
1202        let user = Config {
1203            retention: RetentionConfig {
1204                hot_days: 30,
1205                warm_days: 45,
1206            },
1207            ..Default::default()
1208        };
1209        let merged = merge(base, user);
1210        assert_eq!(merged.retention.hot_days, 60);
1211        assert_eq!(merged.retention.warm_days, 45);
1212    }
1213
1214    #[test]
1215    fn merge_retention_user_hot_overrides() {
1216        let base = Config {
1217            retention: RetentionConfig {
1218                hot_days: 60,
1219                warm_days: 90,
1220            },
1221            ..Default::default()
1222        };
1223        let user = Config {
1224            retention: RetentionConfig {
1225                hot_days: 14,
1226                warm_days: 90,
1227            },
1228            ..Default::default()
1229        };
1230        let merged = merge(base, user);
1231        assert_eq!(merged.retention.hot_days, 14);
1232        assert_eq!(merged.retention.warm_days, 90);
1233    }
1234
1235    #[test]
1236    fn merge_storage_user_overrides() {
1237        let base = Config {
1238            storage: StorageConfig {
1239                hot_max_bytes: "2GB".into(),
1240                cold_after_days: 14,
1241                retention_days: 120,
1242                flush_hour_utc: 3,
1243            },
1244            ..Default::default()
1245        };
1246        let user = Config {
1247            storage: StorageConfig {
1248                cold_after_days: 3,
1249                ..StorageConfig::default()
1250            },
1251            ..Default::default()
1252        };
1253        let merged = merge(base, user);
1254        assert_eq!(merged.storage.hot_max_bytes, "2GB");
1255        assert_eq!(merged.storage.cold_after_days, 3);
1256        assert_eq!(merged.storage.retention_days, 120);
1257        assert_eq!(merged.storage.flush_hour_utc, 3);
1258    }
1259
1260    #[test]
1261    fn merge_telemetry_exporters_user_wins_non_empty() {
1262        let base = Config {
1263            telemetry: TelemetryConfig {
1264                fail_open: true,
1265                query: TelemetryQueryConfig::default(),
1266                exporters: vec![ExporterConfig::None],
1267            },
1268            ..Default::default()
1269        };
1270        let user = Config {
1271            telemetry: TelemetryConfig {
1272                fail_open: false,
1273                query: TelemetryQueryConfig::default(),
1274                exporters: vec![ExporterConfig::Dev { enabled: true }],
1275            },
1276            ..Default::default()
1277        };
1278        let merged = merge(base, user);
1279        assert!(!merged.telemetry.fail_open);
1280        assert_eq!(merged.telemetry.exporters.len(), 1);
1281    }
1282
1283    #[test]
1284    fn telemetry_query_defaults() {
1285        let t = TelemetryQueryConfig::default();
1286        assert_eq!(t.provider, QueryAuthority::None);
1287        assert_eq!(t.cache_ttl_seconds, 3600);
1288        assert!(!t.identity_allowlist.team);
1289        assert!(!t.has_provider_for_pull());
1290    }
1291
1292    #[test]
1293    fn telemetry_query_has_provider() {
1294        let ph = TelemetryQueryConfig {
1295            provider: QueryAuthority::Posthog,
1296            ..Default::default()
1297        };
1298        assert!(ph.has_provider_for_pull());
1299        let dd = TelemetryQueryConfig {
1300            provider: QueryAuthority::Datadog,
1301            ..Default::default()
1302        };
1303        assert!(dd.has_provider_for_pull());
1304    }
1305
1306    #[test]
1307    fn merge_telemetry_query_user_wins() {
1308        let base = Config {
1309            telemetry: TelemetryConfig {
1310                query: TelemetryQueryConfig {
1311                    provider: QueryAuthority::Posthog,
1312                    cache_ttl_seconds: 3600,
1313                    identity_allowlist: IdentityAllowlist {
1314                        team: true,
1315                        ..Default::default()
1316                    },
1317                },
1318                ..Default::default()
1319            },
1320            ..Default::default()
1321        };
1322        let user = Config {
1323            telemetry: TelemetryConfig {
1324                query: TelemetryQueryConfig {
1325                    cache_ttl_seconds: 7200,
1326                    ..Default::default()
1327                },
1328                ..Default::default()
1329            },
1330            ..Default::default()
1331        };
1332        let merged = merge(base, user);
1333        assert_eq!(merged.telemetry.query.provider, QueryAuthority::Posthog);
1334        assert_eq!(merged.telemetry.query.cache_ttl_seconds, 7200);
1335        assert!(merged.telemetry.query.identity_allowlist.team);
1336    }
1337
1338    #[test]
1339    fn toml_telemetry_query_roundtrip() {
1340        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1341        let home = TempDir::new().unwrap();
1342        let ws = TempDir::new().unwrap();
1343        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1344        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1345        let toml = r#"
1346[telemetry.query]
1347provider = "datadog"
1348cache_ttl_seconds = 1800
1349
1350[telemetry.query.identity_allowlist]
1351team = true
1352branch = true
1353"#;
1354        std::fs::write(data_dir.join("config.toml"), toml).unwrap();
1355        let cfg = load(ws.path()).unwrap();
1356        unsafe { std::env::remove_var("KAIZEN_HOME") };
1357        assert_eq!(cfg.telemetry.query.provider, QueryAuthority::Datadog);
1358        assert_eq!(cfg.telemetry.query.cache_ttl_seconds, 1800);
1359        assert!(cfg.telemetry.query.identity_allowlist.team);
1360        assert!(cfg.telemetry.query.identity_allowlist.branch);
1361        assert!(!cfg.telemetry.query.identity_allowlist.model);
1362    }
1363}