Skip to main content

kaizen/core/
config.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Config loading: workspace `.kaizen/config.toml` then `~/.kaizen/config.toml`.
3//! Missing files → defaults. User config wins on overlap.
4
5use anyhow::Result;
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
10pub struct ScanConfig {
11    pub roots: Vec<String>,
12    /// Minimum seconds between full agent transcript rescans when `--refresh` is not passed.
13    #[serde(default = "default_min_rescan_seconds")]
14    pub min_rescan_seconds: u64,
15}
16
17fn default_min_rescan_seconds() -> u64 {
18    300
19}
20
21impl Default for ScanConfig {
22    fn default() -> Self {
23        Self {
24            roots: vec!["~/.cursor/projects".to_string()],
25            min_rescan_seconds: default_min_rescan_seconds(),
26        }
27    }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct CursorSourceConfig {
32    pub enabled: bool,
33    pub transcript_glob: String,
34}
35
36impl Default for CursorSourceConfig {
37    fn default() -> Self {
38        Self {
39            enabled: true,
40            transcript_glob: "*/agent-transcripts".to_string(),
41        }
42    }
43}
44
45/// Enable tier-1 tail ingestion for agents that store data outside Cursor/Claude/Codex paths.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct TailAgentToggles {
48    #[serde(default = "default_true")]
49    pub goose: bool,
50    #[serde(default = "default_true")]
51    pub openclaw: bool,
52    #[serde(default = "default_true")]
53    pub opencode: bool,
54    #[serde(default = "default_true")]
55    pub copilot_cli: bool,
56    #[serde(default = "default_true")]
57    pub copilot_vscode: bool,
58}
59
60impl Default for TailAgentToggles {
61    fn default() -> Self {
62        Self {
63            goose: true,
64            openclaw: true,
65            opencode: true,
66            copilot_cli: true,
67            copilot_vscode: true,
68        }
69    }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize, Default)]
73pub struct SourcesConfig {
74    #[serde(default)]
75    pub cursor: CursorSourceConfig,
76    #[serde(default)]
77    pub tail: TailAgentToggles,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
81pub struct RetentionConfig {
82    pub hot_days: u32,
83    pub warm_days: u32,
84}
85
86impl Default for RetentionConfig {
87    fn default() -> Self {
88        Self {
89            hot_days: 30,
90            warm_days: 90,
91        }
92    }
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct SyncConfig {
97    /// When empty, sync is disabled (no outbox enqueue, `sync run` no-ops flush).
98    #[serde(default)]
99    pub endpoint: String,
100    #[serde(default)]
101    pub team_token: String,
102    #[serde(default)]
103    pub team_id: String,
104    #[serde(default = "default_events_per_batch")]
105    pub events_per_batch_max: usize,
106    #[serde(default = "default_max_body_bytes")]
107    pub max_body_bytes: usize,
108    #[serde(default = "default_flush_interval_ms")]
109    pub flush_interval_ms: u64,
110    #[serde(default = "default_sample_rate")]
111    pub sample_rate: f64,
112    /// 64 hex chars (32 bytes). Prefer `~/.kaizen/config.toml` only; never committed workspace secrets.
113    #[serde(default)]
114    pub team_salt_hex: String,
115}
116
117fn default_events_per_batch() -> usize {
118    500
119}
120
121fn default_max_body_bytes() -> usize {
122    1_000_000
123}
124
125fn default_flush_interval_ms() -> u64 {
126    10_000
127}
128
129fn default_sample_rate() -> f64 {
130    1.0
131}
132
133impl Default for SyncConfig {
134    fn default() -> Self {
135        Self {
136            endpoint: String::new(),
137            team_token: String::new(),
138            team_id: String::new(),
139            events_per_batch_max: default_events_per_batch(),
140            max_body_bytes: default_max_body_bytes(),
141            flush_interval_ms: default_flush_interval_ms(),
142            sample_rate: default_sample_rate(),
143            team_salt_hex: String::new(),
144        }
145    }
146}
147
148/// Parse `team_salt_hex` into 32 bytes. Returns `None` if missing or invalid.
149pub fn try_team_salt(cfg: &SyncConfig) -> Option<[u8; 32]> {
150    let h = cfg.team_salt_hex.trim();
151    if h.len() != 64 {
152        return None;
153    }
154    let bytes = hex::decode(h).ok()?;
155    bytes.try_into().ok()
156}
157
158fn default_true() -> bool {
159    true
160}
161
162fn default_telemetry_fail_open() -> bool {
163    true
164}
165
166fn default_cache_ttl_seconds() -> u64 {
167    3600
168}
169
170/// Which third-party system is the single source for query-back / pull; OTLP is export-only, not a pull target.
171#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
172#[serde(rename_all = "lowercase")]
173pub enum QueryAuthority {
174    #[default]
175    None,
176    Posthog,
177    Datadog,
178}
179
180/// Per-field allowlist: when `false` (default), the field is omitted or hashed in telemetry exports.
181#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
182pub struct IdentityAllowlist {
183    #[serde(default)]
184    pub team: bool,
185    #[serde(default)]
186    pub workspace_label: bool,
187    #[serde(default)]
188    pub runner_label: bool,
189    #[serde(default)]
190    pub actor_kind: bool,
191    #[serde(default)]
192    pub actor_label: bool,
193    #[serde(default)]
194    pub agent: bool,
195    #[serde(default)]
196    pub model: bool,
197    #[serde(default)]
198    pub env: bool,
199    #[serde(default)]
200    pub job: bool,
201    #[serde(default)]
202    pub branch: bool,
203}
204
205/// Remote pull: query authority, cache TTL, and which identity labels may leave as cleartext.
206#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
207pub struct TelemetryQueryConfig {
208    /// `posthog` or `datadog` enables provider pull when implemented; `none` or unset = no query authority.
209    #[serde(default)]
210    pub provider: QueryAuthority,
211    /// Seconds to treat remote cache rows as fresh (unless the CLI requests `--refresh`).
212    #[serde(default = "default_cache_ttl_seconds")]
213    pub cache_ttl_seconds: u64,
214    #[serde(default)]
215    pub identity_allowlist: IdentityAllowlist,
216}
217
218impl Default for TelemetryQueryConfig {
219    fn default() -> Self {
220        Self {
221            provider: QueryAuthority::default(),
222            cache_ttl_seconds: default_cache_ttl_seconds(),
223            identity_allowlist: IdentityAllowlist::default(),
224        }
225    }
226}
227
228impl TelemetryQueryConfig {
229    /// True when a PostHog or Datadog pull backend may be used (OTLP is not a pull target).
230    pub fn has_provider_for_pull(&self) -> bool {
231        matches!(
232            self.provider,
233            QueryAuthority::Posthog | QueryAuthority::Datadog
234        )
235    }
236}
237
238/// How to reduce billed input to the model (opt-in; default leaves requests unchanged).
239#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
240#[serde(tag = "type", rename_all = "snake_case")]
241pub enum ContextPolicy {
242    /// No transformation beyond optional JSON minify (same tokens as a direct call).
243    #[default]
244    None,
245    /// Keep the last `count` `messages` array entries; system blocks unchanged when present.
246    LastMessages { count: usize },
247    /// Drop oldest messages until a rough `chars/4` estimate stays at or below `max`.
248    MaxInputTokens { max: u32 },
249}
250
251/// Anthropic API-compatible HTTP proxy: forward + local telemetry. See `docs/llm-proxy.md`.
252#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
253pub struct ProxyConfig {
254    /// e.g. `127.0.0.1:3847` (bind address for `kaizen proxy run`).
255    #[serde(default = "default_proxy_listen")]
256    pub listen: String,
257    /// Base URL, no trailing slash, e.g. `https://api.anthropic.com`.
258    #[serde(default = "default_proxy_upstream")]
259    pub upstream: String,
260    /// Prefer `Accept-Encoding: gzip` to upstream (response bodies may be gzip).
261    #[serde(default = "default_true")]
262    pub compress_transport: bool,
263    /// Re-encode JSON bodies to compact `serde_json` (no key reorder; whitespace only).
264    #[serde(default = "default_true")]
265    pub minify_json: bool,
266    /// Slurp cap for a single upstream response (streaming not yet teed; see doc).
267    #[serde(default = "default_proxy_max_body_mb")]
268    pub max_response_body_mb: u32,
269    /// Reject / fail incoming client bodies above this (POST bodies before forward).
270    #[serde(default = "default_proxy_max_request_body_mb")]
271    pub max_request_body_mb: u32,
272    /// Optional token-aware truncation of `messages` in JSON bodies.
273    #[serde(default)]
274    pub context_policy: ContextPolicy,
275}
276
277fn default_proxy_listen() -> String {
278    "127.0.0.1:3847".to_string()
279}
280
281fn default_proxy_upstream() -> String {
282    "https://api.anthropic.com".to_string()
283}
284
285fn default_proxy_max_body_mb() -> u32 {
286    256
287}
288
289fn default_proxy_max_request_body_mb() -> u32 {
290    32
291}
292
293impl Default for ProxyConfig {
294    fn default() -> Self {
295        Self {
296            listen: default_proxy_listen(),
297            upstream: default_proxy_upstream(),
298            compress_transport: true,
299            minify_json: true,
300            max_response_body_mb: default_proxy_max_body_mb(),
301            max_request_body_mb: default_proxy_max_request_body_mb(),
302            context_policy: ContextPolicy::default(),
303        }
304    }
305}
306
307/// Optional third-party telemetry sinks; same redacted batches as Kaizen sync.
308#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct TelemetryConfig {
310    /// When `true` (default), ignore exporter errors; when `false`, `flush` fails if any secondary errors.
311    #[serde(default = "default_telemetry_fail_open")]
312    pub fail_open: bool,
313    /// Query-back / pull API: authority, cache TTL, identity allowlist.
314    #[serde(default)]
315    pub query: TelemetryQueryConfig,
316    /// Declarative list; `type = "none"` rows are accepted and ignored.
317    #[serde(default)]
318    pub exporters: Vec<ExporterConfig>,
319}
320
321impl Default for TelemetryConfig {
322    fn default() -> Self {
323        Self {
324            fail_open: default_telemetry_fail_open(),
325            query: TelemetryQueryConfig::default(),
326            exporters: Vec::new(),
327        }
328    }
329}
330
331/// One pluggable sink; TOML `type` is the tag.
332#[derive(Debug, Clone, Serialize, Deserialize)]
333#[serde(tag = "type", rename_all = "lowercase")]
334pub enum ExporterConfig {
335    /// No-op row for sparse tables / templates.
336    None,
337    /// Append summary JSON lines to a local NDJSON file (default `<workspace>/.kaizen/telemetry.ndjson`).
338    File {
339        #[serde(default = "default_true")]
340        enabled: bool,
341        #[serde(default)]
342        path: Option<String>,
343    },
344    /// Echo to tracing (for wiring tests; requires the `telemetry-dev` build feature).
345    Dev {
346        #[serde(default = "default_true")]
347        enabled: bool,
348    },
349    PostHog {
350        #[serde(default = "default_true")]
351        enabled: bool,
352        /// e.g. `https://us.i.posthog.com` (default when unset)
353        host: Option<String>,
354        /// Prefer env `POSTHOG_API_KEY` or `KAIZEN_POSTHOG_API_KEY`
355        project_api_key: Option<String>,
356    },
357    Datadog {
358        #[serde(default = "default_true")]
359        enabled: bool,
360        /// e.g. `datadoghq.com`; env `DD_SITE` overrides
361        site: Option<String>,
362        /// Prefer env `DD_API_KEY` or `KAIZEN_DD_API_KEY`
363        api_key: Option<String>,
364    },
365    Otlp {
366        #[serde(default = "default_true")]
367        enabled: bool,
368        /// Env `OTEL_EXPORTER_OTLP_ENDPOINT` (or KAIZEN_ prefix) when unset here
369        endpoint: Option<String>,
370    },
371}
372
373impl ExporterConfig {
374    /// Whether this row should be considered for `load_exporters` (excludes `None`).
375    pub fn is_enabled(&self) -> bool {
376        match self {
377            ExporterConfig::None => false,
378            ExporterConfig::File { enabled, .. } => *enabled,
379            ExporterConfig::Dev { enabled, .. } => *enabled,
380            ExporterConfig::PostHog { enabled, .. } => *enabled,
381            ExporterConfig::Datadog { enabled, .. } => *enabled,
382            ExporterConfig::Otlp { enabled, .. } => *enabled,
383        }
384    }
385}
386
387#[derive(Debug, Clone, Serialize, Deserialize)]
388pub struct EvalConfig {
389    #[serde(default)]
390    pub enabled: bool,
391    #[serde(default = "default_eval_endpoint")]
392    pub endpoint: String,
393    #[serde(default)]
394    pub api_key: String,
395    #[serde(default = "default_eval_model")]
396    pub model: String,
397    #[serde(default = "default_eval_rubric")]
398    pub rubric: String,
399    #[serde(default = "default_eval_batch_size")]
400    pub batch_size: usize,
401    #[serde(default = "default_eval_min_cost")]
402    pub min_cost_usd: f64,
403}
404
405impl Default for EvalConfig {
406    fn default() -> Self {
407        Self {
408            enabled: false,
409            endpoint: default_eval_endpoint(),
410            api_key: String::new(),
411            model: default_eval_model(),
412            rubric: default_eval_rubric(),
413            batch_size: default_eval_batch_size(),
414            min_cost_usd: default_eval_min_cost(),
415        }
416    }
417}
418
419fn default_eval_endpoint() -> String {
420    "https://api.anthropic.com".into()
421}
422fn default_eval_model() -> String {
423    "claude-haiku-4-5-20251001".into()
424}
425fn default_eval_rubric() -> String {
426    "tool-efficiency-v1".into()
427}
428fn default_eval_batch_size() -> usize {
429    20
430}
431fn default_eval_min_cost() -> f64 {
432    0.01
433}
434
435/// Opt-in post-hook outcome measurement (Tier C).
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct CollectOutcomesConfig {
438    #[serde(default)]
439    pub enabled: bool,
440    #[serde(default = "default_outcomes_test_cmd")]
441    pub test_cmd: String,
442    #[serde(default = "default_outcomes_timeout_secs")]
443    pub timeout_secs: u64,
444    #[serde(default)]
445    pub lint_cmd: Option<String>,
446}
447
448fn default_outcomes_test_cmd() -> String {
449    "cargo test --quiet".to_string()
450}
451
452fn default_outcomes_timeout_secs() -> u64 {
453    600
454}
455
456impl Default for CollectOutcomesConfig {
457    fn default() -> Self {
458        Self {
459            enabled: false,
460            test_cmd: default_outcomes_test_cmd(),
461            timeout_secs: default_outcomes_timeout_secs(),
462            lint_cmd: None,
463        }
464    }
465}
466
467/// Opt-in per-process sampling (Tier D).
468#[derive(Debug, Clone, Serialize, Deserialize)]
469pub struct CollectSystemSamplerConfig {
470    #[serde(default)]
471    pub enabled: bool,
472    #[serde(default = "default_sampler_sample_ms")]
473    pub sample_ms: u64,
474    #[serde(default = "default_sampler_max_samples")]
475    pub max_samples_per_session: u32,
476}
477
478fn default_sampler_sample_ms() -> u64 {
479    2000
480}
481
482fn default_sampler_max_samples() -> u32 {
483    3600
484}
485
486impl Default for CollectSystemSamplerConfig {
487    fn default() -> Self {
488        Self {
489            enabled: false,
490            sample_ms: default_sampler_sample_ms(),
491            max_samples_per_session: default_sampler_max_samples(),
492        }
493    }
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize, Default)]
497pub struct CollectConfig {
498    #[serde(default)]
499    pub outcomes: CollectOutcomesConfig,
500    #[serde(default)]
501    pub system_sampler: CollectSystemSamplerConfig,
502}
503
504#[derive(Debug, Clone, Serialize, Deserialize, Default)]
505pub struct Config {
506    #[serde(default)]
507    pub scan: ScanConfig,
508    #[serde(default)]
509    pub sources: SourcesConfig,
510    #[serde(default)]
511    pub retention: RetentionConfig,
512    #[serde(default)]
513    pub sync: SyncConfig,
514    #[serde(default)]
515    pub telemetry: TelemetryConfig,
516    #[serde(default)]
517    pub proxy: ProxyConfig,
518    #[serde(default)]
519    pub eval: EvalConfig,
520    #[serde(default)]
521    pub collect: CollectConfig,
522}
523
524/// Load config: workspace `.kaizen/config.toml` then `~/.kaizen/config.toml`.
525/// User config wins on overlap. Missing files → defaults, not error.
526pub fn load(workspace: &Path) -> Result<Config> {
527    let workspace_path = workspace.join(".kaizen/config.toml");
528    let user_path = home_dir()?.join(".kaizen/config.toml");
529
530    let base = load_file(&workspace_path).unwrap_or_default();
531    let user = load_file(&user_path).unwrap_or_default();
532    Ok(merge(base, user))
533}
534
535fn home_dir() -> Result<std::path::PathBuf> {
536    std::env::var("HOME")
537        .map(std::path::PathBuf::from)
538        .map_err(|e| anyhow::anyhow!("HOME not set: {e}"))
539}
540
541fn load_file(path: &Path) -> Option<Config> {
542    let text = std::fs::read_to_string(path).ok()?;
543    toml::from_str(&text).ok()
544}
545
546fn merge(base: Config, user: Config) -> Config {
547    Config {
548        scan: merge_scan(base.scan, user.scan),
549        sources: merge_sources(base.sources, user.sources),
550        retention: merge_retention(base.retention, user.retention),
551        sync: merge_sync(base.sync, user.sync),
552        telemetry: merge_telemetry(base.telemetry, user.telemetry),
553        proxy: merge_proxy(base.proxy, user.proxy),
554        eval: merge_eval(base.eval, user.eval),
555        collect: merge_collect(base.collect, user.collect),
556    }
557}
558
559fn merge_collect(base: CollectConfig, user: CollectConfig) -> CollectConfig {
560    let def = CollectConfig::default();
561    CollectConfig {
562        outcomes: merge_collect_outcomes(base.outcomes, user.outcomes, def.outcomes),
563        system_sampler: merge_collect_sampler(
564            base.system_sampler,
565            user.system_sampler,
566            def.system_sampler,
567        ),
568    }
569}
570
571fn merge_collect_outcomes(
572    base: CollectOutcomesConfig,
573    user: CollectOutcomesConfig,
574    def: CollectOutcomesConfig,
575) -> CollectOutcomesConfig {
576    CollectOutcomesConfig {
577        enabled: if user.enabled != def.enabled {
578            user.enabled
579        } else {
580            base.enabled
581        },
582        test_cmd: if user.test_cmd != def.test_cmd {
583            user.test_cmd
584        } else {
585            base.test_cmd
586        },
587        timeout_secs: if user.timeout_secs != def.timeout_secs {
588            user.timeout_secs
589        } else {
590            base.timeout_secs
591        },
592        lint_cmd: user.lint_cmd.or(base.lint_cmd),
593    }
594}
595
596fn merge_collect_sampler(
597    base: CollectSystemSamplerConfig,
598    user: CollectSystemSamplerConfig,
599    def: CollectSystemSamplerConfig,
600) -> CollectSystemSamplerConfig {
601    CollectSystemSamplerConfig {
602        enabled: if user.enabled != def.enabled {
603            user.enabled
604        } else {
605            base.enabled
606        },
607        sample_ms: if user.sample_ms != def.sample_ms {
608            user.sample_ms
609        } else {
610            base.sample_ms
611        },
612        max_samples_per_session: if user.max_samples_per_session != def.max_samples_per_session {
613            user.max_samples_per_session
614        } else {
615            base.max_samples_per_session
616        },
617    }
618}
619
620fn merge_sources(base: SourcesConfig, user: SourcesConfig) -> SourcesConfig {
621    let def = SourcesConfig::default();
622    SourcesConfig {
623        cursor: merge_cursor_source(base.cursor, user.cursor, def.cursor),
624        tail: merge_tail_toggles(base.tail, user.tail, def.tail),
625    }
626}
627
628fn merge_cursor_source(
629    base: CursorSourceConfig,
630    user: CursorSourceConfig,
631    def: CursorSourceConfig,
632) -> CursorSourceConfig {
633    CursorSourceConfig {
634        enabled: if user.enabled != def.enabled {
635            user.enabled
636        } else {
637            base.enabled
638        },
639        transcript_glob: if user.transcript_glob != def.transcript_glob {
640            user.transcript_glob
641        } else {
642            base.transcript_glob
643        },
644    }
645}
646
647fn merge_tail_toggles(
648    base: TailAgentToggles,
649    user: TailAgentToggles,
650    def: TailAgentToggles,
651) -> TailAgentToggles {
652    TailAgentToggles {
653        goose: if user.goose != def.goose {
654            user.goose
655        } else {
656            base.goose
657        },
658        openclaw: if user.openclaw != def.openclaw {
659            user.openclaw
660        } else {
661            base.openclaw
662        },
663        opencode: if user.opencode != def.opencode {
664            user.opencode
665        } else {
666            base.opencode
667        },
668        copilot_cli: if user.copilot_cli != def.copilot_cli {
669            user.copilot_cli
670        } else {
671            base.copilot_cli
672        },
673        copilot_vscode: if user.copilot_vscode != def.copilot_vscode {
674            user.copilot_vscode
675        } else {
676            base.copilot_vscode
677        },
678    }
679}
680
681fn merge_eval(base: EvalConfig, user: EvalConfig) -> EvalConfig {
682    let def = EvalConfig::default();
683    EvalConfig {
684        enabled: if user.enabled != def.enabled {
685            user.enabled
686        } else {
687            base.enabled
688        },
689        endpoint: if user.endpoint != def.endpoint {
690            user.endpoint
691        } else {
692            base.endpoint
693        },
694        api_key: if !user.api_key.is_empty() {
695            user.api_key
696        } else {
697            base.api_key
698        },
699        model: if user.model != def.model {
700            user.model
701        } else {
702            base.model
703        },
704        rubric: if user.rubric != def.rubric {
705            user.rubric
706        } else {
707            base.rubric
708        },
709        batch_size: if user.batch_size != def.batch_size {
710            user.batch_size
711        } else {
712            base.batch_size
713        },
714        min_cost_usd: if user.min_cost_usd != def.min_cost_usd {
715            user.min_cost_usd
716        } else {
717            base.min_cost_usd
718        },
719    }
720}
721
722fn merge_scan(base: ScanConfig, user: ScanConfig) -> ScanConfig {
723    let def = ScanConfig::default();
724    ScanConfig {
725        roots: if user.roots != def.roots {
726            user.roots
727        } else {
728            base.roots
729        },
730        min_rescan_seconds: if user.min_rescan_seconds != def.min_rescan_seconds {
731            user.min_rescan_seconds
732        } else {
733            base.min_rescan_seconds
734        },
735    }
736}
737
738fn merge_retention(base: RetentionConfig, user: RetentionConfig) -> RetentionConfig {
739    let def = RetentionConfig::default();
740    RetentionConfig {
741        hot_days: if user.hot_days != def.hot_days {
742            user.hot_days
743        } else {
744            base.hot_days
745        },
746        warm_days: if user.warm_days != def.warm_days {
747            user.warm_days
748        } else {
749            base.warm_days
750        },
751    }
752}
753
754fn merge_proxy(base: ProxyConfig, user: ProxyConfig) -> ProxyConfig {
755    let def = ProxyConfig::default();
756    ProxyConfig {
757        listen: if user.listen != def.listen {
758            user.listen
759        } else {
760            base.listen
761        },
762        upstream: if user.upstream != def.upstream {
763            user.upstream
764        } else {
765            base.upstream
766        },
767        compress_transport: if user.compress_transport != def.compress_transport {
768            user.compress_transport
769        } else {
770            base.compress_transport
771        },
772        minify_json: if user.minify_json != def.minify_json {
773            user.minify_json
774        } else {
775            base.minify_json
776        },
777        max_response_body_mb: if user.max_response_body_mb != def.max_response_body_mb {
778            user.max_response_body_mb
779        } else {
780            base.max_response_body_mb
781        },
782        max_request_body_mb: if user.max_request_body_mb != def.max_request_body_mb {
783            user.max_request_body_mb
784        } else {
785            base.max_request_body_mb
786        },
787        context_policy: if user.context_policy != def.context_policy {
788            user.context_policy
789        } else {
790            base.context_policy
791        },
792    }
793}
794
795fn merge_telemetry(base: TelemetryConfig, user: TelemetryConfig) -> TelemetryConfig {
796    let def = TelemetryConfig::default();
797    let fail_open = if user.fail_open != def.fail_open {
798        user.fail_open
799    } else {
800        base.fail_open
801    };
802    let query = merge_telemetry_query(base.query, user.query);
803    let exporters = if !user.exporters.is_empty() {
804        user.exporters
805    } else {
806        base.exporters
807    };
808    TelemetryConfig {
809        fail_open,
810        query,
811        exporters,
812    }
813}
814
815fn merge_telemetry_query(
816    base: TelemetryQueryConfig,
817    user: TelemetryQueryConfig,
818) -> TelemetryQueryConfig {
819    let def = TelemetryQueryConfig::default();
820    TelemetryQueryConfig {
821        provider: if user.provider != def.provider {
822            user.provider
823        } else {
824            base.provider
825        },
826        cache_ttl_seconds: if user.cache_ttl_seconds != def.cache_ttl_seconds {
827            user.cache_ttl_seconds
828        } else {
829            base.cache_ttl_seconds
830        },
831        identity_allowlist: merge_identity_allowlist(
832            base.identity_allowlist,
833            user.identity_allowlist,
834        ),
835    }
836}
837
838fn merge_identity_allowlist(base: IdentityAllowlist, user: IdentityAllowlist) -> IdentityAllowlist {
839    let def = IdentityAllowlist::default();
840    IdentityAllowlist {
841        team: if user.team != def.team {
842            user.team
843        } else {
844            base.team
845        },
846        workspace_label: if user.workspace_label != def.workspace_label {
847            user.workspace_label
848        } else {
849            base.workspace_label
850        },
851        runner_label: if user.runner_label != def.runner_label {
852            user.runner_label
853        } else {
854            base.runner_label
855        },
856        actor_kind: if user.actor_kind != def.actor_kind {
857            user.actor_kind
858        } else {
859            base.actor_kind
860        },
861        actor_label: if user.actor_label != def.actor_label {
862            user.actor_label
863        } else {
864            base.actor_label
865        },
866        agent: if user.agent != def.agent {
867            user.agent
868        } else {
869            base.agent
870        },
871        model: if user.model != def.model {
872            user.model
873        } else {
874            base.model
875        },
876        env: if user.env != def.env {
877            user.env
878        } else {
879            base.env
880        },
881        job: if user.job != def.job {
882            user.job
883        } else {
884            base.job
885        },
886        branch: if user.branch != def.branch {
887            user.branch
888        } else {
889            base.branch
890        },
891    }
892}
893
894fn merge_sync(base: SyncConfig, user: SyncConfig) -> SyncConfig {
895    let def = SyncConfig::default();
896    SyncConfig {
897        endpoint: if !user.endpoint.is_empty() {
898            user.endpoint
899        } else {
900            base.endpoint
901        },
902        team_token: if !user.team_token.is_empty() {
903            user.team_token
904        } else {
905            base.team_token
906        },
907        team_id: if !user.team_id.is_empty() {
908            user.team_id
909        } else {
910            base.team_id
911        },
912        events_per_batch_max: if user.events_per_batch_max != def.events_per_batch_max {
913            user.events_per_batch_max
914        } else {
915            base.events_per_batch_max
916        },
917        max_body_bytes: if user.max_body_bytes != def.max_body_bytes {
918            user.max_body_bytes
919        } else {
920            base.max_body_bytes
921        },
922        flush_interval_ms: if user.flush_interval_ms != def.flush_interval_ms {
923            user.flush_interval_ms
924        } else {
925            base.flush_interval_ms
926        },
927        sample_rate: if (user.sample_rate - def.sample_rate).abs() > f64::EPSILON {
928            user.sample_rate
929        } else {
930            base.sample_rate
931        },
932        team_salt_hex: if !user.team_salt_hex.is_empty() {
933            user.team_salt_hex
934        } else {
935            base.team_salt_hex
936        },
937    }
938}
939
940#[cfg(test)]
941mod tests {
942    use super::*;
943    use std::io::Write;
944    use tempfile::TempDir;
945
946    #[test]
947    fn defaults_when_no_files() {
948        let dir = TempDir::new().unwrap();
949        let cfg = load(dir.path()).unwrap();
950        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
951        assert_eq!(cfg.scan.min_rescan_seconds, 300);
952        assert_eq!(cfg.retention.hot_days, 30);
953    }
954
955    #[test]
956    fn workspace_config_loaded() {
957        let dir = TempDir::new().unwrap();
958        std::fs::create_dir_all(dir.path().join(".kaizen")).unwrap();
959        let mut f = std::fs::File::create(dir.path().join(".kaizen/config.toml")).unwrap();
960        writeln!(f, "[scan]\nroots = [\"/custom/root\"]").unwrap();
961
962        let cfg = load(dir.path()).unwrap();
963        assert_eq!(cfg.scan.roots, vec!["/custom/root"]);
964    }
965
966    #[test]
967    fn invalid_toml_ignored() {
968        let dir = TempDir::new().unwrap();
969        std::fs::create_dir_all(dir.path().join(".kaizen")).unwrap();
970        std::fs::write(dir.path().join(".kaizen/config.toml"), "not valid toml :::").unwrap();
971
972        let cfg = load(dir.path()).unwrap();
973        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
974    }
975
976    #[test]
977    fn merge_user_roots_win() {
978        let base = Config {
979            scan: ScanConfig {
980                roots: vec!["/base".to_string()],
981                ..ScanConfig::default()
982            },
983            ..Default::default()
984        };
985        let user = Config {
986            scan: ScanConfig {
987                roots: vec!["/user".to_string()],
988                ..ScanConfig::default()
989            },
990            ..Default::default()
991        };
992        let merged = merge(base, user);
993        assert_eq!(merged.scan.roots, vec!["/user"]);
994    }
995
996    #[test]
997    fn merge_sources_user_default_keeps_workspace_cursor() {
998        let base = Config {
999            sources: SourcesConfig {
1000                cursor: CursorSourceConfig {
1001                    enabled: false,
1002                    transcript_glob: "/workspace/glob/**".into(),
1003                },
1004                ..Default::default()
1005            },
1006            ..Default::default()
1007        };
1008        let user = Config::default();
1009        let merged = merge(base, user);
1010        assert!(!merged.sources.cursor.enabled);
1011        assert_eq!(merged.sources.cursor.transcript_glob, "/workspace/glob/**");
1012    }
1013
1014    #[test]
1015    fn merge_retention_field_by_field() {
1016        let base = Config {
1017            retention: RetentionConfig {
1018                hot_days: 60,
1019                warm_days: 90,
1020            },
1021            ..Default::default()
1022        };
1023        let user = Config {
1024            retention: RetentionConfig {
1025                hot_days: 30,
1026                warm_days: 45,
1027            },
1028            ..Default::default()
1029        };
1030        let merged = merge(base, user);
1031        assert_eq!(merged.retention.hot_days, 60);
1032        assert_eq!(merged.retention.warm_days, 45);
1033    }
1034
1035    #[test]
1036    fn merge_retention_user_hot_overrides() {
1037        let base = Config {
1038            retention: RetentionConfig {
1039                hot_days: 60,
1040                warm_days: 90,
1041            },
1042            ..Default::default()
1043        };
1044        let user = Config {
1045            retention: RetentionConfig {
1046                hot_days: 14,
1047                warm_days: 90,
1048            },
1049            ..Default::default()
1050        };
1051        let merged = merge(base, user);
1052        assert_eq!(merged.retention.hot_days, 14);
1053        assert_eq!(merged.retention.warm_days, 90);
1054    }
1055
1056    #[test]
1057    fn merge_telemetry_exporters_user_wins_non_empty() {
1058        let base = Config {
1059            telemetry: TelemetryConfig {
1060                fail_open: true,
1061                query: TelemetryQueryConfig::default(),
1062                exporters: vec![ExporterConfig::None],
1063            },
1064            ..Default::default()
1065        };
1066        let user = Config {
1067            telemetry: TelemetryConfig {
1068                fail_open: false,
1069                query: TelemetryQueryConfig::default(),
1070                exporters: vec![ExporterConfig::Dev { enabled: true }],
1071            },
1072            ..Default::default()
1073        };
1074        let merged = merge(base, user);
1075        assert!(!merged.telemetry.fail_open);
1076        assert_eq!(merged.telemetry.exporters.len(), 1);
1077    }
1078
1079    #[test]
1080    fn telemetry_query_defaults() {
1081        let t = TelemetryQueryConfig::default();
1082        assert_eq!(t.provider, QueryAuthority::None);
1083        assert_eq!(t.cache_ttl_seconds, 3600);
1084        assert!(!t.identity_allowlist.team);
1085        assert!(!t.has_provider_for_pull());
1086    }
1087
1088    #[test]
1089    fn telemetry_query_has_provider() {
1090        let ph = TelemetryQueryConfig {
1091            provider: QueryAuthority::Posthog,
1092            ..Default::default()
1093        };
1094        assert!(ph.has_provider_for_pull());
1095        let dd = TelemetryQueryConfig {
1096            provider: QueryAuthority::Datadog,
1097            ..Default::default()
1098        };
1099        assert!(dd.has_provider_for_pull());
1100    }
1101
1102    #[test]
1103    fn merge_telemetry_query_user_wins() {
1104        let base = Config {
1105            telemetry: TelemetryConfig {
1106                query: TelemetryQueryConfig {
1107                    provider: QueryAuthority::Posthog,
1108                    cache_ttl_seconds: 3600,
1109                    identity_allowlist: IdentityAllowlist {
1110                        team: true,
1111                        ..Default::default()
1112                    },
1113                },
1114                ..Default::default()
1115            },
1116            ..Default::default()
1117        };
1118        let user = Config {
1119            telemetry: TelemetryConfig {
1120                query: TelemetryQueryConfig {
1121                    cache_ttl_seconds: 7200,
1122                    ..Default::default()
1123                },
1124                ..Default::default()
1125            },
1126            ..Default::default()
1127        };
1128        let merged = merge(base, user);
1129        assert_eq!(merged.telemetry.query.provider, QueryAuthority::Posthog);
1130        assert_eq!(merged.telemetry.query.cache_ttl_seconds, 7200);
1131        assert!(merged.telemetry.query.identity_allowlist.team);
1132    }
1133
1134    #[test]
1135    fn toml_telemetry_query_roundtrip() {
1136        let dir = TempDir::new().unwrap();
1137        std::fs::create_dir_all(dir.path().join(".kaizen")).unwrap();
1138        let toml = r#"
1139[telemetry.query]
1140provider = "datadog"
1141cache_ttl_seconds = 1800
1142
1143[telemetry.query.identity_allowlist]
1144team = true
1145branch = true
1146"#;
1147        std::fs::write(dir.path().join(".kaizen/config.toml"), toml).unwrap();
1148        let cfg = load(dir.path()).unwrap();
1149        assert_eq!(cfg.telemetry.query.provider, QueryAuthority::Datadog);
1150        assert_eq!(cfg.telemetry.query.cache_ttl_seconds, 1800);
1151        assert!(cfg.telemetry.query.identity_allowlist.team);
1152        assert!(cfg.telemetry.query.identity_allowlist.branch);
1153        assert!(!cfg.telemetry.query.identity_allowlist.model);
1154    }
1155}