Skip to main content

kaizen/core/
config.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Config loading: workspace `.kaizen/config.toml` then `~/.kaizen/config.toml`.
3//! Missing files → defaults. User config wins on overlap.
4
5use anyhow::Result;
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
10pub struct ScanConfig {
11    pub roots: Vec<String>,
12    /// Minimum seconds between full agent transcript rescans when `--refresh` is not passed.
13    #[serde(default = "default_min_rescan_seconds")]
14    pub min_rescan_seconds: u64,
15}
16
17fn default_min_rescan_seconds() -> u64 {
18    300
19}
20
21impl Default for ScanConfig {
22    fn default() -> Self {
23        Self {
24            roots: vec!["~/.cursor/projects".to_string()],
25            min_rescan_seconds: default_min_rescan_seconds(),
26        }
27    }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct CursorSourceConfig {
32    pub enabled: bool,
33    pub transcript_glob: String,
34}
35
36impl Default for CursorSourceConfig {
37    fn default() -> Self {
38        Self {
39            enabled: true,
40            transcript_glob: "*/agent-transcripts".to_string(),
41        }
42    }
43}
44
45/// Enable tier-1 tail ingestion for agents that store data outside Cursor/Claude/Codex paths.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct TailAgentToggles {
48    #[serde(default = "default_true")]
49    pub goose: bool,
50    #[serde(default = "default_true")]
51    pub openclaw: bool,
52    #[serde(default = "default_true")]
53    pub opencode: bool,
54    #[serde(default = "default_true")]
55    pub copilot_cli: bool,
56    #[serde(default = "default_true")]
57    pub copilot_vscode: bool,
58}
59
60impl Default for TailAgentToggles {
61    fn default() -> Self {
62        Self {
63            goose: true,
64            openclaw: true,
65            opencode: true,
66            copilot_cli: true,
67            copilot_vscode: true,
68        }
69    }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize, Default)]
73pub struct SourcesConfig {
74    #[serde(default)]
75    pub cursor: CursorSourceConfig,
76    #[serde(default)]
77    pub tail: TailAgentToggles,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
81pub struct RetentionConfig {
82    pub hot_days: u32,
83    pub warm_days: u32,
84}
85
86impl Default for RetentionConfig {
87    fn default() -> Self {
88        Self {
89            hot_days: 30,
90            warm_days: 90,
91        }
92    }
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct SyncConfig {
97    /// When empty, sync is disabled (no outbox enqueue, `sync run` no-ops flush).
98    #[serde(default)]
99    pub endpoint: String,
100    #[serde(default)]
101    pub team_token: String,
102    #[serde(default)]
103    pub team_id: String,
104    #[serde(default = "default_events_per_batch")]
105    pub events_per_batch_max: usize,
106    #[serde(default = "default_max_body_bytes")]
107    pub max_body_bytes: usize,
108    #[serde(default = "default_flush_interval_ms")]
109    pub flush_interval_ms: u64,
110    #[serde(default = "default_sample_rate")]
111    pub sample_rate: f64,
112    /// 64 hex chars (32 bytes). Prefer `~/.kaizen/config.toml` only; never committed workspace secrets.
113    #[serde(default)]
114    pub team_salt_hex: String,
115}
116
117fn default_events_per_batch() -> usize {
118    500
119}
120
121fn default_max_body_bytes() -> usize {
122    1_000_000
123}
124
125fn default_flush_interval_ms() -> u64 {
126    10_000
127}
128
129fn default_sample_rate() -> f64 {
130    1.0
131}
132
133impl Default for SyncConfig {
134    fn default() -> Self {
135        Self {
136            endpoint: String::new(),
137            team_token: String::new(),
138            team_id: String::new(),
139            events_per_batch_max: default_events_per_batch(),
140            max_body_bytes: default_max_body_bytes(),
141            flush_interval_ms: default_flush_interval_ms(),
142            sample_rate: default_sample_rate(),
143            team_salt_hex: String::new(),
144        }
145    }
146}
147
148/// Parse `team_salt_hex` into 32 bytes. Returns `None` if missing or invalid.
149pub fn try_team_salt(cfg: &SyncConfig) -> Option<[u8; 32]> {
150    let h = cfg.team_salt_hex.trim();
151    if h.len() != 64 {
152        return None;
153    }
154    let bytes = hex::decode(h).ok()?;
155    bytes.try_into().ok()
156}
157
158fn default_true() -> bool {
159    true
160}
161
162fn default_telemetry_fail_open() -> bool {
163    true
164}
165
166fn default_cache_ttl_seconds() -> u64 {
167    3600
168}
169
170/// Which third-party system is the single source for query-back / pull; OTLP is export-only, not a pull target.
171#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
172#[serde(rename_all = "lowercase")]
173pub enum QueryAuthority {
174    #[default]
175    None,
176    Posthog,
177    Datadog,
178}
179
180/// Per-field allowlist: when `false` (default), the field is omitted or hashed in telemetry exports.
181#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
182pub struct IdentityAllowlist {
183    #[serde(default)]
184    pub team: bool,
185    #[serde(default)]
186    pub workspace_label: bool,
187    #[serde(default)]
188    pub runner_label: bool,
189    #[serde(default)]
190    pub actor_kind: bool,
191    #[serde(default)]
192    pub actor_label: bool,
193    #[serde(default)]
194    pub agent: bool,
195    #[serde(default)]
196    pub model: bool,
197    #[serde(default)]
198    pub env: bool,
199    #[serde(default)]
200    pub job: bool,
201    #[serde(default)]
202    pub branch: bool,
203}
204
205/// Remote pull: query authority, cache TTL, and which identity labels may leave as cleartext.
206#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
207pub struct TelemetryQueryConfig {
208    /// `posthog` or `datadog` enables provider pull when implemented; `none` or unset = no query authority.
209    #[serde(default)]
210    pub provider: QueryAuthority,
211    /// Seconds to treat remote cache rows as fresh (unless the CLI requests `--refresh`).
212    #[serde(default = "default_cache_ttl_seconds")]
213    pub cache_ttl_seconds: u64,
214    #[serde(default)]
215    pub identity_allowlist: IdentityAllowlist,
216}
217
218impl Default for TelemetryQueryConfig {
219    fn default() -> Self {
220        Self {
221            provider: QueryAuthority::default(),
222            cache_ttl_seconds: default_cache_ttl_seconds(),
223            identity_allowlist: IdentityAllowlist::default(),
224        }
225    }
226}
227
228impl TelemetryQueryConfig {
229    /// True when a PostHog or Datadog pull backend may be used (OTLP is not a pull target).
230    pub fn has_provider_for_pull(&self) -> bool {
231        matches!(
232            self.provider,
233            QueryAuthority::Posthog | QueryAuthority::Datadog
234        )
235    }
236}
237
238/// How to reduce billed input to the model (opt-in; default leaves requests unchanged).
239#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
240#[serde(tag = "type", rename_all = "snake_case")]
241pub enum ContextPolicy {
242    /// No transformation beyond optional JSON minify (same tokens as a direct call).
243    #[default]
244    None,
245    /// Keep the last `count` `messages` array entries; system blocks unchanged when present.
246    LastMessages { count: usize },
247    /// Drop oldest messages until a rough `chars/4` estimate stays at or below `max`.
248    MaxInputTokens { max: u32 },
249}
250
251/// Anthropic API-compatible HTTP proxy: forward + local telemetry. See `docs/llm-proxy.md`.
252#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
253pub struct ProxyConfig {
254    /// e.g. `127.0.0.1:3847` (bind address for `kaizen proxy run`).
255    #[serde(default = "default_proxy_listen")]
256    pub listen: String,
257    /// Base URL, no trailing slash, e.g. `https://api.anthropic.com`.
258    #[serde(default = "default_proxy_upstream")]
259    pub upstream: String,
260    /// Prefer `Accept-Encoding: gzip` to upstream (response bodies may be gzip).
261    #[serde(default = "default_true")]
262    pub compress_transport: bool,
263    /// Re-encode JSON bodies to compact `serde_json` (no key reorder; whitespace only).
264    #[serde(default = "default_true")]
265    pub minify_json: bool,
266    /// Slurp cap for a single upstream response (streaming not yet teed; see doc).
267    #[serde(default = "default_proxy_max_body_mb")]
268    pub max_response_body_mb: u32,
269    /// Reject / fail incoming client bodies above this (POST bodies before forward).
270    #[serde(default = "default_proxy_max_request_body_mb")]
271    pub max_request_body_mb: u32,
272    /// Optional token-aware truncation of `messages` in JSON bodies.
273    #[serde(default)]
274    pub context_policy: ContextPolicy,
275}
276
277fn default_proxy_listen() -> String {
278    "127.0.0.1:3847".to_string()
279}
280
281fn default_proxy_upstream() -> String {
282    "https://api.anthropic.com".to_string()
283}
284
285fn default_proxy_max_body_mb() -> u32 {
286    256
287}
288
289fn default_proxy_max_request_body_mb() -> u32 {
290    32
291}
292
293impl Default for ProxyConfig {
294    fn default() -> Self {
295        Self {
296            listen: default_proxy_listen(),
297            upstream: default_proxy_upstream(),
298            compress_transport: true,
299            minify_json: true,
300            max_response_body_mb: default_proxy_max_body_mb(),
301            max_request_body_mb: default_proxy_max_request_body_mb(),
302            context_policy: ContextPolicy::default(),
303        }
304    }
305}
306
307/// Optional third-party telemetry sinks; same redacted batches as Kaizen sync.
308#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct TelemetryConfig {
310    /// When `true` (default), ignore exporter errors; when `false`, `flush` fails if any secondary errors.
311    #[serde(default = "default_telemetry_fail_open")]
312    pub fail_open: bool,
313    /// Query-back / pull API: authority, cache TTL, identity allowlist.
314    #[serde(default)]
315    pub query: TelemetryQueryConfig,
316    /// Declarative list; `type = "none"` rows are accepted and ignored.
317    #[serde(default)]
318    pub exporters: Vec<ExporterConfig>,
319}
320
321impl Default for TelemetryConfig {
322    fn default() -> Self {
323        Self {
324            fail_open: default_telemetry_fail_open(),
325            query: TelemetryQueryConfig::default(),
326            exporters: Vec::new(),
327        }
328    }
329}
330
331/// One pluggable sink; TOML `type` is the tag.
332#[derive(Debug, Clone, Serialize, Deserialize)]
333#[serde(tag = "type", rename_all = "lowercase")]
334pub enum ExporterConfig {
335    /// No-op row for sparse tables / templates.
336    None,
337    /// Echo to tracing (for wiring tests; requires the `telemetry-dev` build feature).
338    Dev {
339        #[serde(default = "default_true")]
340        enabled: bool,
341    },
342    PostHog {
343        #[serde(default = "default_true")]
344        enabled: bool,
345        /// e.g. `https://us.i.posthog.com` (default when unset)
346        host: Option<String>,
347        /// Prefer env `POSTHOG_API_KEY` or `KAIZEN_POSTHOG_API_KEY`
348        project_api_key: Option<String>,
349    },
350    Datadog {
351        #[serde(default = "default_true")]
352        enabled: bool,
353        /// e.g. `datadoghq.com`; env `DD_SITE` overrides
354        site: Option<String>,
355        /// Prefer env `DD_API_KEY` or `KAIZEN_DD_API_KEY`
356        api_key: Option<String>,
357    },
358    Otlp {
359        #[serde(default = "default_true")]
360        enabled: bool,
361        /// Env `OTEL_EXPORTER_OTLP_ENDPOINT` (or KAIZEN_ prefix) when unset here
362        endpoint: Option<String>,
363    },
364}
365
366impl ExporterConfig {
367    /// Whether this row should be considered for `load_exporters` (excludes `None`).
368    pub fn is_enabled(&self) -> bool {
369        match self {
370            ExporterConfig::None => false,
371            ExporterConfig::Dev { enabled, .. } => *enabled,
372            ExporterConfig::PostHog { enabled, .. } => *enabled,
373            ExporterConfig::Datadog { enabled, .. } => *enabled,
374            ExporterConfig::Otlp { enabled, .. } => *enabled,
375        }
376    }
377}
378
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct EvalConfig {
381    #[serde(default)]
382    pub enabled: bool,
383    #[serde(default = "default_eval_endpoint")]
384    pub endpoint: String,
385    #[serde(default)]
386    pub api_key: String,
387    #[serde(default = "default_eval_model")]
388    pub model: String,
389    #[serde(default = "default_eval_rubric")]
390    pub rubric: String,
391    #[serde(default = "default_eval_batch_size")]
392    pub batch_size: usize,
393    #[serde(default = "default_eval_min_cost")]
394    pub min_cost_usd: f64,
395}
396
397impl Default for EvalConfig {
398    fn default() -> Self {
399        Self {
400            enabled: false,
401            endpoint: default_eval_endpoint(),
402            api_key: String::new(),
403            model: default_eval_model(),
404            rubric: default_eval_rubric(),
405            batch_size: default_eval_batch_size(),
406            min_cost_usd: default_eval_min_cost(),
407        }
408    }
409}
410
411fn default_eval_endpoint() -> String {
412    "https://api.anthropic.com".into()
413}
414fn default_eval_model() -> String {
415    "claude-haiku-4-5-20251001".into()
416}
417fn default_eval_rubric() -> String {
418    "tool-efficiency-v1".into()
419}
420fn default_eval_batch_size() -> usize {
421    20
422}
423fn default_eval_min_cost() -> f64 {
424    0.01
425}
426
427/// Opt-in post-hook outcome measurement (Tier C).
428#[derive(Debug, Clone, Serialize, Deserialize)]
429pub struct CollectOutcomesConfig {
430    #[serde(default)]
431    pub enabled: bool,
432    #[serde(default = "default_outcomes_test_cmd")]
433    pub test_cmd: String,
434    #[serde(default = "default_outcomes_timeout_secs")]
435    pub timeout_secs: u64,
436    #[serde(default)]
437    pub lint_cmd: Option<String>,
438}
439
440fn default_outcomes_test_cmd() -> String {
441    "cargo test --quiet".to_string()
442}
443
444fn default_outcomes_timeout_secs() -> u64 {
445    600
446}
447
448impl Default for CollectOutcomesConfig {
449    fn default() -> Self {
450        Self {
451            enabled: false,
452            test_cmd: default_outcomes_test_cmd(),
453            timeout_secs: default_outcomes_timeout_secs(),
454            lint_cmd: None,
455        }
456    }
457}
458
459/// Opt-in per-process sampling (Tier D).
460#[derive(Debug, Clone, Serialize, Deserialize)]
461pub struct CollectSystemSamplerConfig {
462    #[serde(default)]
463    pub enabled: bool,
464    #[serde(default = "default_sampler_sample_ms")]
465    pub sample_ms: u64,
466    #[serde(default = "default_sampler_max_samples")]
467    pub max_samples_per_session: u32,
468}
469
470fn default_sampler_sample_ms() -> u64 {
471    2000
472}
473
474fn default_sampler_max_samples() -> u32 {
475    3600
476}
477
478impl Default for CollectSystemSamplerConfig {
479    fn default() -> Self {
480        Self {
481            enabled: false,
482            sample_ms: default_sampler_sample_ms(),
483            max_samples_per_session: default_sampler_max_samples(),
484        }
485    }
486}
487
488#[derive(Debug, Clone, Serialize, Deserialize, Default)]
489pub struct CollectConfig {
490    #[serde(default)]
491    pub outcomes: CollectOutcomesConfig,
492    #[serde(default)]
493    pub system_sampler: CollectSystemSamplerConfig,
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize, Default)]
497pub struct Config {
498    #[serde(default)]
499    pub scan: ScanConfig,
500    #[serde(default)]
501    pub sources: SourcesConfig,
502    #[serde(default)]
503    pub retention: RetentionConfig,
504    #[serde(default)]
505    pub sync: SyncConfig,
506    #[serde(default)]
507    pub telemetry: TelemetryConfig,
508    #[serde(default)]
509    pub proxy: ProxyConfig,
510    #[serde(default)]
511    pub eval: EvalConfig,
512    #[serde(default)]
513    pub collect: CollectConfig,
514}
515
516/// Load config: workspace `.kaizen/config.toml` then `~/.kaizen/config.toml`.
517/// User config wins on overlap. Missing files → defaults, not error.
518pub fn load(workspace: &Path) -> Result<Config> {
519    let workspace_path = workspace.join(".kaizen/config.toml");
520    let user_path = home_dir()?.join(".kaizen/config.toml");
521
522    let base = load_file(&workspace_path).unwrap_or_default();
523    let user = load_file(&user_path).unwrap_or_default();
524    Ok(merge(base, user))
525}
526
527fn home_dir() -> Result<std::path::PathBuf> {
528    std::env::var("HOME")
529        .map(std::path::PathBuf::from)
530        .map_err(|e| anyhow::anyhow!("HOME not set: {e}"))
531}
532
533fn load_file(path: &Path) -> Option<Config> {
534    let text = std::fs::read_to_string(path).ok()?;
535    toml::from_str(&text).ok()
536}
537
538fn merge(base: Config, user: Config) -> Config {
539    Config {
540        scan: merge_scan(base.scan, user.scan),
541        sources: merge_sources(base.sources, user.sources),
542        retention: merge_retention(base.retention, user.retention),
543        sync: merge_sync(base.sync, user.sync),
544        telemetry: merge_telemetry(base.telemetry, user.telemetry),
545        proxy: merge_proxy(base.proxy, user.proxy),
546        eval: merge_eval(base.eval, user.eval),
547        collect: merge_collect(base.collect, user.collect),
548    }
549}
550
551fn merge_collect(base: CollectConfig, user: CollectConfig) -> CollectConfig {
552    let def = CollectConfig::default();
553    CollectConfig {
554        outcomes: merge_collect_outcomes(base.outcomes, user.outcomes, def.outcomes),
555        system_sampler: merge_collect_sampler(
556            base.system_sampler,
557            user.system_sampler,
558            def.system_sampler,
559        ),
560    }
561}
562
563fn merge_collect_outcomes(
564    base: CollectOutcomesConfig,
565    user: CollectOutcomesConfig,
566    def: CollectOutcomesConfig,
567) -> CollectOutcomesConfig {
568    CollectOutcomesConfig {
569        enabled: if user.enabled != def.enabled {
570            user.enabled
571        } else {
572            base.enabled
573        },
574        test_cmd: if user.test_cmd != def.test_cmd {
575            user.test_cmd
576        } else {
577            base.test_cmd
578        },
579        timeout_secs: if user.timeout_secs != def.timeout_secs {
580            user.timeout_secs
581        } else {
582            base.timeout_secs
583        },
584        lint_cmd: user.lint_cmd.or(base.lint_cmd),
585    }
586}
587
588fn merge_collect_sampler(
589    base: CollectSystemSamplerConfig,
590    user: CollectSystemSamplerConfig,
591    def: CollectSystemSamplerConfig,
592) -> CollectSystemSamplerConfig {
593    CollectSystemSamplerConfig {
594        enabled: if user.enabled != def.enabled {
595            user.enabled
596        } else {
597            base.enabled
598        },
599        sample_ms: if user.sample_ms != def.sample_ms {
600            user.sample_ms
601        } else {
602            base.sample_ms
603        },
604        max_samples_per_session: if user.max_samples_per_session != def.max_samples_per_session {
605            user.max_samples_per_session
606        } else {
607            base.max_samples_per_session
608        },
609    }
610}
611
612fn merge_sources(base: SourcesConfig, user: SourcesConfig) -> SourcesConfig {
613    let def = SourcesConfig::default();
614    SourcesConfig {
615        cursor: merge_cursor_source(base.cursor, user.cursor, def.cursor),
616        tail: merge_tail_toggles(base.tail, user.tail, def.tail),
617    }
618}
619
620fn merge_cursor_source(
621    base: CursorSourceConfig,
622    user: CursorSourceConfig,
623    def: CursorSourceConfig,
624) -> CursorSourceConfig {
625    CursorSourceConfig {
626        enabled: if user.enabled != def.enabled {
627            user.enabled
628        } else {
629            base.enabled
630        },
631        transcript_glob: if user.transcript_glob != def.transcript_glob {
632            user.transcript_glob
633        } else {
634            base.transcript_glob
635        },
636    }
637}
638
639fn merge_tail_toggles(
640    base: TailAgentToggles,
641    user: TailAgentToggles,
642    def: TailAgentToggles,
643) -> TailAgentToggles {
644    TailAgentToggles {
645        goose: if user.goose != def.goose {
646            user.goose
647        } else {
648            base.goose
649        },
650        openclaw: if user.openclaw != def.openclaw {
651            user.openclaw
652        } else {
653            base.openclaw
654        },
655        opencode: if user.opencode != def.opencode {
656            user.opencode
657        } else {
658            base.opencode
659        },
660        copilot_cli: if user.copilot_cli != def.copilot_cli {
661            user.copilot_cli
662        } else {
663            base.copilot_cli
664        },
665        copilot_vscode: if user.copilot_vscode != def.copilot_vscode {
666            user.copilot_vscode
667        } else {
668            base.copilot_vscode
669        },
670    }
671}
672
673fn merge_eval(base: EvalConfig, user: EvalConfig) -> EvalConfig {
674    let def = EvalConfig::default();
675    EvalConfig {
676        enabled: if user.enabled != def.enabled {
677            user.enabled
678        } else {
679            base.enabled
680        },
681        endpoint: if user.endpoint != def.endpoint {
682            user.endpoint
683        } else {
684            base.endpoint
685        },
686        api_key: if !user.api_key.is_empty() {
687            user.api_key
688        } else {
689            base.api_key
690        },
691        model: if user.model != def.model {
692            user.model
693        } else {
694            base.model
695        },
696        rubric: if user.rubric != def.rubric {
697            user.rubric
698        } else {
699            base.rubric
700        },
701        batch_size: if user.batch_size != def.batch_size {
702            user.batch_size
703        } else {
704            base.batch_size
705        },
706        min_cost_usd: if user.min_cost_usd != def.min_cost_usd {
707            user.min_cost_usd
708        } else {
709            base.min_cost_usd
710        },
711    }
712}
713
714fn merge_scan(base: ScanConfig, user: ScanConfig) -> ScanConfig {
715    let def = ScanConfig::default();
716    ScanConfig {
717        roots: if user.roots != def.roots {
718            user.roots
719        } else {
720            base.roots
721        },
722        min_rescan_seconds: if user.min_rescan_seconds != def.min_rescan_seconds {
723            user.min_rescan_seconds
724        } else {
725            base.min_rescan_seconds
726        },
727    }
728}
729
730fn merge_retention(base: RetentionConfig, user: RetentionConfig) -> RetentionConfig {
731    let def = RetentionConfig::default();
732    RetentionConfig {
733        hot_days: if user.hot_days != def.hot_days {
734            user.hot_days
735        } else {
736            base.hot_days
737        },
738        warm_days: if user.warm_days != def.warm_days {
739            user.warm_days
740        } else {
741            base.warm_days
742        },
743    }
744}
745
746fn merge_proxy(base: ProxyConfig, user: ProxyConfig) -> ProxyConfig {
747    let def = ProxyConfig::default();
748    ProxyConfig {
749        listen: if user.listen != def.listen {
750            user.listen
751        } else {
752            base.listen
753        },
754        upstream: if user.upstream != def.upstream {
755            user.upstream
756        } else {
757            base.upstream
758        },
759        compress_transport: if user.compress_transport != def.compress_transport {
760            user.compress_transport
761        } else {
762            base.compress_transport
763        },
764        minify_json: if user.minify_json != def.minify_json {
765            user.minify_json
766        } else {
767            base.minify_json
768        },
769        max_response_body_mb: if user.max_response_body_mb != def.max_response_body_mb {
770            user.max_response_body_mb
771        } else {
772            base.max_response_body_mb
773        },
774        max_request_body_mb: if user.max_request_body_mb != def.max_request_body_mb {
775            user.max_request_body_mb
776        } else {
777            base.max_request_body_mb
778        },
779        context_policy: if user.context_policy != def.context_policy {
780            user.context_policy
781        } else {
782            base.context_policy
783        },
784    }
785}
786
787fn merge_telemetry(base: TelemetryConfig, user: TelemetryConfig) -> TelemetryConfig {
788    let def = TelemetryConfig::default();
789    let fail_open = if user.fail_open != def.fail_open {
790        user.fail_open
791    } else {
792        base.fail_open
793    };
794    let query = merge_telemetry_query(base.query, user.query);
795    let exporters = if !user.exporters.is_empty() {
796        user.exporters
797    } else {
798        base.exporters
799    };
800    TelemetryConfig {
801        fail_open,
802        query,
803        exporters,
804    }
805}
806
807fn merge_telemetry_query(
808    base: TelemetryQueryConfig,
809    user: TelemetryQueryConfig,
810) -> TelemetryQueryConfig {
811    let def = TelemetryQueryConfig::default();
812    TelemetryQueryConfig {
813        provider: if user.provider != def.provider {
814            user.provider
815        } else {
816            base.provider
817        },
818        cache_ttl_seconds: if user.cache_ttl_seconds != def.cache_ttl_seconds {
819            user.cache_ttl_seconds
820        } else {
821            base.cache_ttl_seconds
822        },
823        identity_allowlist: merge_identity_allowlist(
824            base.identity_allowlist,
825            user.identity_allowlist,
826        ),
827    }
828}
829
830fn merge_identity_allowlist(base: IdentityAllowlist, user: IdentityAllowlist) -> IdentityAllowlist {
831    let def = IdentityAllowlist::default();
832    IdentityAllowlist {
833        team: if user.team != def.team {
834            user.team
835        } else {
836            base.team
837        },
838        workspace_label: if user.workspace_label != def.workspace_label {
839            user.workspace_label
840        } else {
841            base.workspace_label
842        },
843        runner_label: if user.runner_label != def.runner_label {
844            user.runner_label
845        } else {
846            base.runner_label
847        },
848        actor_kind: if user.actor_kind != def.actor_kind {
849            user.actor_kind
850        } else {
851            base.actor_kind
852        },
853        actor_label: if user.actor_label != def.actor_label {
854            user.actor_label
855        } else {
856            base.actor_label
857        },
858        agent: if user.agent != def.agent {
859            user.agent
860        } else {
861            base.agent
862        },
863        model: if user.model != def.model {
864            user.model
865        } else {
866            base.model
867        },
868        env: if user.env != def.env {
869            user.env
870        } else {
871            base.env
872        },
873        job: if user.job != def.job {
874            user.job
875        } else {
876            base.job
877        },
878        branch: if user.branch != def.branch {
879            user.branch
880        } else {
881            base.branch
882        },
883    }
884}
885
886fn merge_sync(base: SyncConfig, user: SyncConfig) -> SyncConfig {
887    let def = SyncConfig::default();
888    SyncConfig {
889        endpoint: if !user.endpoint.is_empty() {
890            user.endpoint
891        } else {
892            base.endpoint
893        },
894        team_token: if !user.team_token.is_empty() {
895            user.team_token
896        } else {
897            base.team_token
898        },
899        team_id: if !user.team_id.is_empty() {
900            user.team_id
901        } else {
902            base.team_id
903        },
904        events_per_batch_max: if user.events_per_batch_max != def.events_per_batch_max {
905            user.events_per_batch_max
906        } else {
907            base.events_per_batch_max
908        },
909        max_body_bytes: if user.max_body_bytes != def.max_body_bytes {
910            user.max_body_bytes
911        } else {
912            base.max_body_bytes
913        },
914        flush_interval_ms: if user.flush_interval_ms != def.flush_interval_ms {
915            user.flush_interval_ms
916        } else {
917            base.flush_interval_ms
918        },
919        sample_rate: if (user.sample_rate - def.sample_rate).abs() > f64::EPSILON {
920            user.sample_rate
921        } else {
922            base.sample_rate
923        },
924        team_salt_hex: if !user.team_salt_hex.is_empty() {
925            user.team_salt_hex
926        } else {
927            base.team_salt_hex
928        },
929    }
930}
931
932#[cfg(test)]
933mod tests {
934    use super::*;
935    use std::io::Write;
936    use tempfile::TempDir;
937
938    #[test]
939    fn defaults_when_no_files() {
940        let dir = TempDir::new().unwrap();
941        let cfg = load(dir.path()).unwrap();
942        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
943        assert_eq!(cfg.scan.min_rescan_seconds, 300);
944        assert_eq!(cfg.retention.hot_days, 30);
945    }
946
947    #[test]
948    fn workspace_config_loaded() {
949        let dir = TempDir::new().unwrap();
950        std::fs::create_dir_all(dir.path().join(".kaizen")).unwrap();
951        let mut f = std::fs::File::create(dir.path().join(".kaizen/config.toml")).unwrap();
952        writeln!(f, "[scan]\nroots = [\"/custom/root\"]").unwrap();
953
954        let cfg = load(dir.path()).unwrap();
955        assert_eq!(cfg.scan.roots, vec!["/custom/root"]);
956    }
957
958    #[test]
959    fn invalid_toml_ignored() {
960        let dir = TempDir::new().unwrap();
961        std::fs::create_dir_all(dir.path().join(".kaizen")).unwrap();
962        std::fs::write(dir.path().join(".kaizen/config.toml"), "not valid toml :::").unwrap();
963
964        let cfg = load(dir.path()).unwrap();
965        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
966    }
967
968    #[test]
969    fn merge_user_roots_win() {
970        let base = Config {
971            scan: ScanConfig {
972                roots: vec!["/base".to_string()],
973                ..ScanConfig::default()
974            },
975            ..Default::default()
976        };
977        let user = Config {
978            scan: ScanConfig {
979                roots: vec!["/user".to_string()],
980                ..ScanConfig::default()
981            },
982            ..Default::default()
983        };
984        let merged = merge(base, user);
985        assert_eq!(merged.scan.roots, vec!["/user"]);
986    }
987
988    #[test]
989    fn merge_sources_user_default_keeps_workspace_cursor() {
990        let base = Config {
991            sources: SourcesConfig {
992                cursor: CursorSourceConfig {
993                    enabled: false,
994                    transcript_glob: "/workspace/glob/**".into(),
995                },
996                ..Default::default()
997            },
998            ..Default::default()
999        };
1000        let user = Config::default();
1001        let merged = merge(base, user);
1002        assert!(!merged.sources.cursor.enabled);
1003        assert_eq!(merged.sources.cursor.transcript_glob, "/workspace/glob/**");
1004    }
1005
1006    #[test]
1007    fn merge_retention_field_by_field() {
1008        let base = Config {
1009            retention: RetentionConfig {
1010                hot_days: 60,
1011                warm_days: 90,
1012            },
1013            ..Default::default()
1014        };
1015        let user = Config {
1016            retention: RetentionConfig {
1017                hot_days: 30,
1018                warm_days: 45,
1019            },
1020            ..Default::default()
1021        };
1022        let merged = merge(base, user);
1023        assert_eq!(merged.retention.hot_days, 60);
1024        assert_eq!(merged.retention.warm_days, 45);
1025    }
1026
1027    #[test]
1028    fn merge_retention_user_hot_overrides() {
1029        let base = Config {
1030            retention: RetentionConfig {
1031                hot_days: 60,
1032                warm_days: 90,
1033            },
1034            ..Default::default()
1035        };
1036        let user = Config {
1037            retention: RetentionConfig {
1038                hot_days: 14,
1039                warm_days: 90,
1040            },
1041            ..Default::default()
1042        };
1043        let merged = merge(base, user);
1044        assert_eq!(merged.retention.hot_days, 14);
1045        assert_eq!(merged.retention.warm_days, 90);
1046    }
1047
1048    #[test]
1049    fn merge_telemetry_exporters_user_wins_non_empty() {
1050        let base = Config {
1051            telemetry: TelemetryConfig {
1052                fail_open: true,
1053                query: TelemetryQueryConfig::default(),
1054                exporters: vec![ExporterConfig::None],
1055            },
1056            ..Default::default()
1057        };
1058        let user = Config {
1059            telemetry: TelemetryConfig {
1060                fail_open: false,
1061                query: TelemetryQueryConfig::default(),
1062                exporters: vec![ExporterConfig::Dev { enabled: true }],
1063            },
1064            ..Default::default()
1065        };
1066        let merged = merge(base, user);
1067        assert!(!merged.telemetry.fail_open);
1068        assert_eq!(merged.telemetry.exporters.len(), 1);
1069    }
1070
1071    #[test]
1072    fn telemetry_query_defaults() {
1073        let t = TelemetryQueryConfig::default();
1074        assert_eq!(t.provider, QueryAuthority::None);
1075        assert_eq!(t.cache_ttl_seconds, 3600);
1076        assert!(!t.identity_allowlist.team);
1077        assert!(!t.has_provider_for_pull());
1078    }
1079
1080    #[test]
1081    fn telemetry_query_has_provider() {
1082        let ph = TelemetryQueryConfig {
1083            provider: QueryAuthority::Posthog,
1084            ..Default::default()
1085        };
1086        assert!(ph.has_provider_for_pull());
1087        let dd = TelemetryQueryConfig {
1088            provider: QueryAuthority::Datadog,
1089            ..Default::default()
1090        };
1091        assert!(dd.has_provider_for_pull());
1092    }
1093
1094    #[test]
1095    fn merge_telemetry_query_user_wins() {
1096        let base = Config {
1097            telemetry: TelemetryConfig {
1098                query: TelemetryQueryConfig {
1099                    provider: QueryAuthority::Posthog,
1100                    cache_ttl_seconds: 3600,
1101                    identity_allowlist: IdentityAllowlist {
1102                        team: true,
1103                        ..Default::default()
1104                    },
1105                },
1106                ..Default::default()
1107            },
1108            ..Default::default()
1109        };
1110        let user = Config {
1111            telemetry: TelemetryConfig {
1112                query: TelemetryQueryConfig {
1113                    cache_ttl_seconds: 7200,
1114                    ..Default::default()
1115                },
1116                ..Default::default()
1117            },
1118            ..Default::default()
1119        };
1120        let merged = merge(base, user);
1121        assert_eq!(merged.telemetry.query.provider, QueryAuthority::Posthog);
1122        assert_eq!(merged.telemetry.query.cache_ttl_seconds, 7200);
1123        assert!(merged.telemetry.query.identity_allowlist.team);
1124    }
1125
1126    #[test]
1127    fn toml_telemetry_query_roundtrip() {
1128        let dir = TempDir::new().unwrap();
1129        std::fs::create_dir_all(dir.path().join(".kaizen")).unwrap();
1130        let toml = r#"
1131[telemetry.query]
1132provider = "datadog"
1133cache_ttl_seconds = 1800
1134
1135[telemetry.query.identity_allowlist]
1136team = true
1137branch = true
1138"#;
1139        std::fs::write(dir.path().join(".kaizen/config.toml"), toml).unwrap();
1140        let cfg = load(dir.path()).unwrap();
1141        assert_eq!(cfg.telemetry.query.provider, QueryAuthority::Datadog);
1142        assert_eq!(cfg.telemetry.query.cache_ttl_seconds, 1800);
1143        assert!(cfg.telemetry.query.identity_allowlist.team);
1144        assert!(cfg.telemetry.query.identity_allowlist.branch);
1145        assert!(!cfg.telemetry.query.identity_allowlist.model);
1146    }
1147}