Skip to main content

kaizen/core/
config.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Config loading: workspace `.kaizen/config.toml` then `~/.kaizen/config.toml`.
3//! Missing files → defaults. User config wins on overlap.
4
5use anyhow::Result;
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
10pub struct ScanConfig {
11    pub roots: Vec<String>,
12    /// Minimum seconds between full agent transcript rescans when `--refresh` is not passed.
13    #[serde(default = "default_min_rescan_seconds")]
14    pub min_rescan_seconds: u64,
15}
16
17fn default_min_rescan_seconds() -> u64 {
18    300
19}
20
21impl Default for ScanConfig {
22    fn default() -> Self {
23        Self {
24            roots: vec!["~/.cursor/projects".to_string()],
25            min_rescan_seconds: default_min_rescan_seconds(),
26        }
27    }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct CursorSourceConfig {
32    pub enabled: bool,
33    pub transcript_glob: String,
34}
35
36impl Default for CursorSourceConfig {
37    fn default() -> Self {
38        Self {
39            enabled: true,
40            transcript_glob: "*/agent-transcripts".to_string(),
41        }
42    }
43}
44
45/// Enable tier-1 tail ingestion for agents that store data outside Cursor/Claude/Codex paths.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct TailAgentToggles {
48    #[serde(default = "default_true")]
49    pub goose: bool,
50    #[serde(default = "default_true")]
51    pub openclaw: bool,
52    #[serde(default = "default_true")]
53    pub opencode: bool,
54    #[serde(default = "default_true")]
55    pub copilot_cli: bool,
56    #[serde(default = "default_true")]
57    pub copilot_vscode: bool,
58}
59
60impl Default for TailAgentToggles {
61    fn default() -> Self {
62        Self {
63            goose: true,
64            openclaw: true,
65            opencode: true,
66            copilot_cli: true,
67            copilot_vscode: true,
68        }
69    }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize, Default)]
73pub struct SourcesConfig {
74    #[serde(default)]
75    pub cursor: CursorSourceConfig,
76    #[serde(default)]
77    pub tail: TailAgentToggles,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
81pub struct RetentionConfig {
82    pub hot_days: u32,
83    pub warm_days: u32,
84}
85
86impl Default for RetentionConfig {
87    fn default() -> Self {
88        Self {
89            hot_days: 30,
90            warm_days: 90,
91        }
92    }
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
96pub struct StorageConfig {
97    pub hot_max_bytes: String,
98    pub cold_after_days: u32,
99    pub retention_days: u32,
100    pub flush_hour_utc: u8,
101}
102
103impl Default for StorageConfig {
104    fn default() -> Self {
105        Self {
106            hot_max_bytes: "1GB".into(),
107            cold_after_days: 7,
108            retention_days: 90,
109            flush_hour_utc: 0,
110        }
111    }
112}
113
114impl StorageConfig {
115    pub fn hot_max_bytes_value(&self) -> u64 {
116        parse_byte_size(&self.hot_max_bytes).unwrap_or(1_073_741_824)
117    }
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct SyncConfig {
122    /// When empty, sync is disabled (no outbox enqueue, `sync run` no-ops flush).
123    #[serde(default)]
124    pub endpoint: String,
125    #[serde(default)]
126    pub team_token: String,
127    #[serde(default)]
128    pub team_id: String,
129    #[serde(default = "default_events_per_batch")]
130    pub events_per_batch_max: usize,
131    #[serde(default = "default_max_body_bytes")]
132    pub max_body_bytes: usize,
133    #[serde(default = "default_flush_interval_ms")]
134    pub flush_interval_ms: u64,
135    #[serde(default = "default_sample_rate")]
136    pub sample_rate: f64,
137    /// 64 hex chars (32 bytes). Prefer `~/.kaizen/config.toml` only; never committed workspace secrets.
138    #[serde(default)]
139    pub team_salt_hex: String,
140}
141
142fn default_events_per_batch() -> usize {
143    500
144}
145
146fn default_max_body_bytes() -> usize {
147    1_000_000
148}
149
150fn default_flush_interval_ms() -> u64 {
151    10_000
152}
153
154fn default_sample_rate() -> f64 {
155    1.0
156}
157
158impl Default for SyncConfig {
159    fn default() -> Self {
160        Self {
161            endpoint: String::new(),
162            team_token: String::new(),
163            team_id: String::new(),
164            events_per_batch_max: default_events_per_batch(),
165            max_body_bytes: default_max_body_bytes(),
166            flush_interval_ms: default_flush_interval_ms(),
167            sample_rate: default_sample_rate(),
168            team_salt_hex: String::new(),
169        }
170    }
171}
172
173/// Parse `team_salt_hex` into 32 bytes. Returns `None` if missing or invalid.
174pub fn try_team_salt(cfg: &SyncConfig) -> Option<[u8; 32]> {
175    let h = cfg.team_salt_hex.trim();
176    if h.len() != 64 {
177        return None;
178    }
179    let bytes = hex::decode(h).ok()?;
180    bytes.try_into().ok()
181}
182
183fn default_true() -> bool {
184    true
185}
186
187fn default_telemetry_fail_open() -> bool {
188    true
189}
190
191fn default_cache_ttl_seconds() -> u64 {
192    3600
193}
194
195/// Which third-party system is the single source for query-back / pull; OTLP is export-only, not a pull target.
196#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
197#[serde(rename_all = "lowercase")]
198pub enum QueryAuthority {
199    #[default]
200    None,
201    Posthog,
202    Datadog,
203}
204
205/// Per-field allowlist: when `false` (default), the field is omitted or hashed in telemetry exports.
206#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
207pub struct IdentityAllowlist {
208    #[serde(default)]
209    pub team: bool,
210    #[serde(default)]
211    pub workspace_label: bool,
212    #[serde(default)]
213    pub runner_label: bool,
214    #[serde(default)]
215    pub actor_kind: bool,
216    #[serde(default)]
217    pub actor_label: bool,
218    #[serde(default)]
219    pub agent: bool,
220    #[serde(default)]
221    pub model: bool,
222    #[serde(default)]
223    pub env: bool,
224    #[serde(default)]
225    pub job: bool,
226    #[serde(default)]
227    pub branch: bool,
228}
229
230/// Remote pull: query authority, cache TTL, and which identity labels may leave as cleartext.
231#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
232pub struct TelemetryQueryConfig {
233    /// `posthog` or `datadog` enables provider pull when implemented; `none` or unset = no query authority.
234    #[serde(default)]
235    pub provider: QueryAuthority,
236    /// Seconds to treat remote cache rows as fresh (unless the CLI requests `--refresh`).
237    #[serde(default = "default_cache_ttl_seconds")]
238    pub cache_ttl_seconds: u64,
239    #[serde(default)]
240    pub identity_allowlist: IdentityAllowlist,
241}
242
243impl Default for TelemetryQueryConfig {
244    fn default() -> Self {
245        Self {
246            provider: QueryAuthority::default(),
247            cache_ttl_seconds: default_cache_ttl_seconds(),
248            identity_allowlist: IdentityAllowlist::default(),
249        }
250    }
251}
252
253impl TelemetryQueryConfig {
254    /// True when a PostHog or Datadog pull backend may be used (OTLP is not a pull target).
255    pub fn has_provider_for_pull(&self) -> bool {
256        matches!(
257            self.provider,
258            QueryAuthority::Posthog | QueryAuthority::Datadog
259        )
260    }
261}
262
263/// How to reduce billed input to the model (opt-in; default leaves requests unchanged).
264#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
265#[serde(tag = "type", rename_all = "snake_case")]
266pub enum ContextPolicy {
267    /// No transformation beyond optional JSON minify (same tokens as a direct call).
268    #[default]
269    None,
270    /// Keep the last `count` `messages` array entries; system blocks unchanged when present.
271    LastMessages { count: usize },
272    /// Drop oldest messages until a rough `chars/4` estimate stays at or below `max`.
273    MaxInputTokens { max: u32 },
274}
275
276/// Anthropic API-compatible HTTP proxy: forward + local telemetry. See `docs/llm-proxy.md`.
277#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
278pub struct ProxyConfig {
279    /// e.g. `127.0.0.1:3847` (bind address for `kaizen proxy run`).
280    #[serde(default = "default_proxy_listen")]
281    pub listen: String,
282    /// Base URL, no trailing slash, e.g. `https://api.anthropic.com`.
283    #[serde(default = "default_proxy_upstream")]
284    pub upstream: String,
285    /// Prefer `Accept-Encoding: gzip` to upstream (response bodies may be gzip).
286    #[serde(default = "default_true")]
287    pub compress_transport: bool,
288    /// Re-encode JSON bodies to compact `serde_json` (no key reorder; whitespace only).
289    #[serde(default = "default_true")]
290    pub minify_json: bool,
291    /// Slurp cap for a single upstream response (streaming not yet teed; see doc).
292    #[serde(default = "default_proxy_max_body_mb")]
293    pub max_response_body_mb: u32,
294    /// Reject / fail incoming client bodies above this (POST bodies before forward).
295    #[serde(default = "default_proxy_max_request_body_mb")]
296    pub max_request_body_mb: u32,
297    /// Optional token-aware truncation of `messages` in JSON bodies.
298    #[serde(default)]
299    pub context_policy: ContextPolicy,
300}
301
302fn default_proxy_listen() -> String {
303    "127.0.0.1:3847".to_string()
304}
305
306fn default_proxy_upstream() -> String {
307    "https://api.anthropic.com".to_string()
308}
309
310fn default_proxy_max_body_mb() -> u32 {
311    256
312}
313
314fn default_proxy_max_request_body_mb() -> u32 {
315    32
316}
317
318impl Default for ProxyConfig {
319    fn default() -> Self {
320        Self {
321            listen: default_proxy_listen(),
322            upstream: default_proxy_upstream(),
323            compress_transport: true,
324            minify_json: true,
325            max_response_body_mb: default_proxy_max_body_mb(),
326            max_request_body_mb: default_proxy_max_request_body_mb(),
327            context_policy: ContextPolicy::default(),
328        }
329    }
330}
331
332/// Optional third-party telemetry sinks; same redacted batches as Kaizen sync.
333#[derive(Debug, Clone, Serialize, Deserialize)]
334pub struct TelemetryConfig {
335    /// When `true` (default), ignore exporter errors; when `false`, `flush` fails if any secondary errors.
336    #[serde(default = "default_telemetry_fail_open")]
337    pub fail_open: bool,
338    /// Query-back / pull API: authority, cache TTL, identity allowlist.
339    #[serde(default)]
340    pub query: TelemetryQueryConfig,
341    /// Declarative list; `type = "none"` rows are accepted and ignored.
342    #[serde(default)]
343    pub exporters: Vec<ExporterConfig>,
344}
345
346impl Default for TelemetryConfig {
347    fn default() -> Self {
348        Self {
349            fail_open: default_telemetry_fail_open(),
350            query: TelemetryQueryConfig::default(),
351            exporters: Vec::new(),
352        }
353    }
354}
355
356/// One pluggable sink; TOML `type` is the tag.
357#[derive(Debug, Clone, Serialize, Deserialize)]
358#[serde(tag = "type", rename_all = "lowercase")]
359pub enum ExporterConfig {
360    /// No-op row for sparse tables / templates.
361    None,
362    /// Append summary JSON lines to a local NDJSON file (default `<workspace>/.kaizen/telemetry.ndjson`).
363    File {
364        #[serde(default = "default_true")]
365        enabled: bool,
366        #[serde(default)]
367        path: Option<String>,
368    },
369    /// Echo to tracing (for wiring tests; requires the `telemetry-dev` build feature).
370    Dev {
371        #[serde(default = "default_true")]
372        enabled: bool,
373    },
374    PostHog {
375        #[serde(default = "default_true")]
376        enabled: bool,
377        /// e.g. `https://us.i.posthog.com` (default when unset)
378        host: Option<String>,
379        /// Prefer env `POSTHOG_API_KEY` or `KAIZEN_POSTHOG_API_KEY`
380        project_api_key: Option<String>,
381    },
382    Datadog {
383        #[serde(default = "default_true")]
384        enabled: bool,
385        /// e.g. `datadoghq.com`; env `DD_SITE` overrides
386        site: Option<String>,
387        /// Prefer env `DD_API_KEY` or `KAIZEN_DD_API_KEY`
388        api_key: Option<String>,
389    },
390    Otlp {
391        #[serde(default = "default_true")]
392        enabled: bool,
393        /// Env `OTEL_EXPORTER_OTLP_ENDPOINT` (or KAIZEN_ prefix) when unset here
394        endpoint: Option<String>,
395    },
396}
397
398impl ExporterConfig {
399    /// Whether this row should be considered for `load_exporters` (excludes `None`).
400    pub fn is_enabled(&self) -> bool {
401        match self {
402            ExporterConfig::None => false,
403            ExporterConfig::File { enabled, .. } => *enabled,
404            ExporterConfig::Dev { enabled, .. } => *enabled,
405            ExporterConfig::PostHog { enabled, .. } => *enabled,
406            ExporterConfig::Datadog { enabled, .. } => *enabled,
407            ExporterConfig::Otlp { enabled, .. } => *enabled,
408        }
409    }
410}
411
412#[derive(Debug, Clone, Serialize, Deserialize)]
413pub struct EvalConfig {
414    #[serde(default)]
415    pub enabled: bool,
416    #[serde(default = "default_eval_endpoint")]
417    pub endpoint: String,
418    #[serde(default)]
419    pub api_key: String,
420    #[serde(default = "default_eval_model")]
421    pub model: String,
422    #[serde(default = "default_eval_rubric")]
423    pub rubric: String,
424    #[serde(default = "default_eval_batch_size")]
425    pub batch_size: usize,
426    #[serde(default = "default_eval_min_cost")]
427    pub min_cost_usd: f64,
428}
429
430impl Default for EvalConfig {
431    fn default() -> Self {
432        Self {
433            enabled: false,
434            endpoint: default_eval_endpoint(),
435            api_key: String::new(),
436            model: default_eval_model(),
437            rubric: default_eval_rubric(),
438            batch_size: default_eval_batch_size(),
439            min_cost_usd: default_eval_min_cost(),
440        }
441    }
442}
443
444fn default_eval_endpoint() -> String {
445    "https://api.anthropic.com".into()
446}
447fn default_eval_model() -> String {
448    "claude-haiku-4-5-20251001".into()
449}
450fn default_eval_rubric() -> String {
451    "tool-efficiency-v1".into()
452}
453fn default_eval_batch_size() -> usize {
454    20
455}
456fn default_eval_min_cost() -> f64 {
457    0.01
458}
459
460/// Opt-in post-hook outcome measurement (Tier C).
461#[derive(Debug, Clone, Serialize, Deserialize)]
462pub struct CollectOutcomesConfig {
463    #[serde(default)]
464    pub enabled: bool,
465    #[serde(default = "default_outcomes_test_cmd")]
466    pub test_cmd: String,
467    #[serde(default = "default_outcomes_timeout_secs")]
468    pub timeout_secs: u64,
469    #[serde(default)]
470    pub lint_cmd: Option<String>,
471}
472
473fn default_outcomes_test_cmd() -> String {
474    "cargo test --quiet".to_string()
475}
476
477fn default_outcomes_timeout_secs() -> u64 {
478    600
479}
480
481impl Default for CollectOutcomesConfig {
482    fn default() -> Self {
483        Self {
484            enabled: false,
485            test_cmd: default_outcomes_test_cmd(),
486            timeout_secs: default_outcomes_timeout_secs(),
487            lint_cmd: None,
488        }
489    }
490}
491
492/// Opt-in per-process sampling (Tier D).
493#[derive(Debug, Clone, Serialize, Deserialize)]
494pub struct CollectSystemSamplerConfig {
495    #[serde(default)]
496    pub enabled: bool,
497    #[serde(default = "default_sampler_sample_ms")]
498    pub sample_ms: u64,
499    #[serde(default = "default_sampler_max_samples")]
500    pub max_samples_per_session: u32,
501}
502
503fn default_sampler_sample_ms() -> u64 {
504    2000
505}
506
507fn default_sampler_max_samples() -> u32 {
508    3600
509}
510
511impl Default for CollectSystemSamplerConfig {
512    fn default() -> Self {
513        Self {
514            enabled: false,
515            sample_ms: default_sampler_sample_ms(),
516            max_samples_per_session: default_sampler_max_samples(),
517        }
518    }
519}
520
521#[derive(Debug, Clone, Serialize, Deserialize, Default)]
522pub struct CollectConfig {
523    #[serde(default)]
524    pub outcomes: CollectOutcomesConfig,
525    #[serde(default)]
526    pub system_sampler: CollectSystemSamplerConfig,
527}
528
529#[derive(Debug, Clone, Serialize, Deserialize, Default)]
530pub struct Config {
531    #[serde(default)]
532    pub scan: ScanConfig,
533    #[serde(default)]
534    pub sources: SourcesConfig,
535    #[serde(default)]
536    pub retention: RetentionConfig,
537    #[serde(default)]
538    pub storage: StorageConfig,
539    #[serde(default)]
540    pub sync: SyncConfig,
541    #[serde(default)]
542    pub telemetry: TelemetryConfig,
543    #[serde(default)]
544    pub proxy: ProxyConfig,
545    #[serde(default)]
546    pub eval: EvalConfig,
547    #[serde(default)]
548    pub collect: CollectConfig,
549}
550
551/// Load config: `~/.kaizen/projects/<slug>/config.toml` then `~/.kaizen/config.toml`.
552/// User config wins on overlap. Missing files → defaults, not error.
553pub fn load(workspace: &Path) -> Result<Config> {
554    let project_cfg = crate::core::paths::project_data_dir(workspace)
555        .ok()
556        .map(|d| d.join("config.toml"));
557    let user_path = crate::core::paths::kaizen_dir()
558        .ok_or_else(|| anyhow::anyhow!("KAIZEN_HOME / HOME unset"))?
559        .join("config.toml");
560
561    let base = project_cfg
562        .as_deref()
563        .and_then(load_file)
564        .unwrap_or_default();
565    let user = load_file(&user_path).unwrap_or_default();
566    Ok(merge(base, user))
567}
568
569fn load_file(path: &Path) -> Option<Config> {
570    let text = std::fs::read_to_string(path).ok()?;
571    toml::from_str(&text).ok()
572}
573
574fn merge(base: Config, user: Config) -> Config {
575    Config {
576        scan: merge_scan(base.scan, user.scan),
577        sources: merge_sources(base.sources, user.sources),
578        retention: merge_retention(base.retention, user.retention),
579        storage: merge_storage(base.storage, user.storage),
580        sync: merge_sync(base.sync, user.sync),
581        telemetry: merge_telemetry(base.telemetry, user.telemetry),
582        proxy: merge_proxy(base.proxy, user.proxy),
583        eval: merge_eval(base.eval, user.eval),
584        collect: merge_collect(base.collect, user.collect),
585    }
586}
587
588fn merge_collect(base: CollectConfig, user: CollectConfig) -> CollectConfig {
589    let def = CollectConfig::default();
590    CollectConfig {
591        outcomes: merge_collect_outcomes(base.outcomes, user.outcomes, def.outcomes),
592        system_sampler: merge_collect_sampler(
593            base.system_sampler,
594            user.system_sampler,
595            def.system_sampler,
596        ),
597    }
598}
599
600fn merge_collect_outcomes(
601    base: CollectOutcomesConfig,
602    user: CollectOutcomesConfig,
603    def: CollectOutcomesConfig,
604) -> CollectOutcomesConfig {
605    CollectOutcomesConfig {
606        enabled: if user.enabled != def.enabled {
607            user.enabled
608        } else {
609            base.enabled
610        },
611        test_cmd: if user.test_cmd != def.test_cmd {
612            user.test_cmd
613        } else {
614            base.test_cmd
615        },
616        timeout_secs: if user.timeout_secs != def.timeout_secs {
617            user.timeout_secs
618        } else {
619            base.timeout_secs
620        },
621        lint_cmd: user.lint_cmd.or(base.lint_cmd),
622    }
623}
624
625fn merge_collect_sampler(
626    base: CollectSystemSamplerConfig,
627    user: CollectSystemSamplerConfig,
628    def: CollectSystemSamplerConfig,
629) -> CollectSystemSamplerConfig {
630    CollectSystemSamplerConfig {
631        enabled: if user.enabled != def.enabled {
632            user.enabled
633        } else {
634            base.enabled
635        },
636        sample_ms: if user.sample_ms != def.sample_ms {
637            user.sample_ms
638        } else {
639            base.sample_ms
640        },
641        max_samples_per_session: if user.max_samples_per_session != def.max_samples_per_session {
642            user.max_samples_per_session
643        } else {
644            base.max_samples_per_session
645        },
646    }
647}
648
649fn merge_sources(base: SourcesConfig, user: SourcesConfig) -> SourcesConfig {
650    let def = SourcesConfig::default();
651    SourcesConfig {
652        cursor: merge_cursor_source(base.cursor, user.cursor, def.cursor),
653        tail: merge_tail_toggles(base.tail, user.tail, def.tail),
654    }
655}
656
657fn merge_cursor_source(
658    base: CursorSourceConfig,
659    user: CursorSourceConfig,
660    def: CursorSourceConfig,
661) -> CursorSourceConfig {
662    CursorSourceConfig {
663        enabled: if user.enabled != def.enabled {
664            user.enabled
665        } else {
666            base.enabled
667        },
668        transcript_glob: if user.transcript_glob != def.transcript_glob {
669            user.transcript_glob
670        } else {
671            base.transcript_glob
672        },
673    }
674}
675
676fn merge_tail_toggles(
677    base: TailAgentToggles,
678    user: TailAgentToggles,
679    def: TailAgentToggles,
680) -> TailAgentToggles {
681    TailAgentToggles {
682        goose: if user.goose != def.goose {
683            user.goose
684        } else {
685            base.goose
686        },
687        openclaw: if user.openclaw != def.openclaw {
688            user.openclaw
689        } else {
690            base.openclaw
691        },
692        opencode: if user.opencode != def.opencode {
693            user.opencode
694        } else {
695            base.opencode
696        },
697        copilot_cli: if user.copilot_cli != def.copilot_cli {
698            user.copilot_cli
699        } else {
700            base.copilot_cli
701        },
702        copilot_vscode: if user.copilot_vscode != def.copilot_vscode {
703            user.copilot_vscode
704        } else {
705            base.copilot_vscode
706        },
707    }
708}
709
710fn merge_eval(base: EvalConfig, user: EvalConfig) -> EvalConfig {
711    let def = EvalConfig::default();
712    EvalConfig {
713        enabled: if user.enabled != def.enabled {
714            user.enabled
715        } else {
716            base.enabled
717        },
718        endpoint: if user.endpoint != def.endpoint {
719            user.endpoint
720        } else {
721            base.endpoint
722        },
723        api_key: if !user.api_key.is_empty() {
724            user.api_key
725        } else {
726            base.api_key
727        },
728        model: if user.model != def.model {
729            user.model
730        } else {
731            base.model
732        },
733        rubric: if user.rubric != def.rubric {
734            user.rubric
735        } else {
736            base.rubric
737        },
738        batch_size: if user.batch_size != def.batch_size {
739            user.batch_size
740        } else {
741            base.batch_size
742        },
743        min_cost_usd: if user.min_cost_usd != def.min_cost_usd {
744            user.min_cost_usd
745        } else {
746            base.min_cost_usd
747        },
748    }
749}
750
751fn merge_scan(base: ScanConfig, user: ScanConfig) -> ScanConfig {
752    let def = ScanConfig::default();
753    ScanConfig {
754        roots: if user.roots != def.roots {
755            user.roots
756        } else {
757            base.roots
758        },
759        min_rescan_seconds: if user.min_rescan_seconds != def.min_rescan_seconds {
760            user.min_rescan_seconds
761        } else {
762            base.min_rescan_seconds
763        },
764    }
765}
766
767fn merge_retention(base: RetentionConfig, user: RetentionConfig) -> RetentionConfig {
768    let def = RetentionConfig::default();
769    RetentionConfig {
770        hot_days: if user.hot_days != def.hot_days {
771            user.hot_days
772        } else {
773            base.hot_days
774        },
775        warm_days: if user.warm_days != def.warm_days {
776            user.warm_days
777        } else {
778            base.warm_days
779        },
780    }
781}
782
783fn merge_storage(base: StorageConfig, user: StorageConfig) -> StorageConfig {
784    let def = StorageConfig::default();
785    StorageConfig {
786        hot_max_bytes: if user.hot_max_bytes != def.hot_max_bytes {
787            user.hot_max_bytes
788        } else {
789            base.hot_max_bytes
790        },
791        cold_after_days: if user.cold_after_days != def.cold_after_days {
792            user.cold_after_days
793        } else {
794            base.cold_after_days
795        },
796        retention_days: if user.retention_days != def.retention_days {
797            user.retention_days
798        } else {
799            base.retention_days
800        },
801        flush_hour_utc: if user.flush_hour_utc != def.flush_hour_utc {
802            user.flush_hour_utc
803        } else {
804            base.flush_hour_utc
805        },
806    }
807}
808
809fn parse_byte_size(raw: &str) -> Option<u64> {
810    let s = raw.trim();
811    let digits = s
812        .chars()
813        .take_while(|c| c.is_ascii_digit())
814        .collect::<String>();
815    let n = digits.parse::<u64>().ok()?;
816    let unit = s[digits.len()..].trim().to_ascii_lowercase();
817    Some(match unit.as_str() {
818        "" | "b" => n,
819        "kb" | "kib" => n.saturating_mul(1024),
820        "mb" | "mib" => n.saturating_mul(1024 * 1024),
821        "gb" | "gib" => n.saturating_mul(1024 * 1024 * 1024),
822        _ => return None,
823    })
824}
825
826fn merge_proxy(base: ProxyConfig, user: ProxyConfig) -> ProxyConfig {
827    let def = ProxyConfig::default();
828    ProxyConfig {
829        listen: if user.listen != def.listen {
830            user.listen
831        } else {
832            base.listen
833        },
834        upstream: if user.upstream != def.upstream {
835            user.upstream
836        } else {
837            base.upstream
838        },
839        compress_transport: if user.compress_transport != def.compress_transport {
840            user.compress_transport
841        } else {
842            base.compress_transport
843        },
844        minify_json: if user.minify_json != def.minify_json {
845            user.minify_json
846        } else {
847            base.minify_json
848        },
849        max_response_body_mb: if user.max_response_body_mb != def.max_response_body_mb {
850            user.max_response_body_mb
851        } else {
852            base.max_response_body_mb
853        },
854        max_request_body_mb: if user.max_request_body_mb != def.max_request_body_mb {
855            user.max_request_body_mb
856        } else {
857            base.max_request_body_mb
858        },
859        context_policy: if user.context_policy != def.context_policy {
860            user.context_policy
861        } else {
862            base.context_policy
863        },
864    }
865}
866
867fn merge_telemetry(base: TelemetryConfig, user: TelemetryConfig) -> TelemetryConfig {
868    let def = TelemetryConfig::default();
869    let fail_open = if user.fail_open != def.fail_open {
870        user.fail_open
871    } else {
872        base.fail_open
873    };
874    let query = merge_telemetry_query(base.query, user.query);
875    let exporters = if !user.exporters.is_empty() {
876        user.exporters
877    } else {
878        base.exporters
879    };
880    TelemetryConfig {
881        fail_open,
882        query,
883        exporters,
884    }
885}
886
887fn merge_telemetry_query(
888    base: TelemetryQueryConfig,
889    user: TelemetryQueryConfig,
890) -> TelemetryQueryConfig {
891    let def = TelemetryQueryConfig::default();
892    TelemetryQueryConfig {
893        provider: if user.provider != def.provider {
894            user.provider
895        } else {
896            base.provider
897        },
898        cache_ttl_seconds: if user.cache_ttl_seconds != def.cache_ttl_seconds {
899            user.cache_ttl_seconds
900        } else {
901            base.cache_ttl_seconds
902        },
903        identity_allowlist: merge_identity_allowlist(
904            base.identity_allowlist,
905            user.identity_allowlist,
906        ),
907    }
908}
909
910fn merge_identity_allowlist(base: IdentityAllowlist, user: IdentityAllowlist) -> IdentityAllowlist {
911    let def = IdentityAllowlist::default();
912    IdentityAllowlist {
913        team: if user.team != def.team {
914            user.team
915        } else {
916            base.team
917        },
918        workspace_label: if user.workspace_label != def.workspace_label {
919            user.workspace_label
920        } else {
921            base.workspace_label
922        },
923        runner_label: if user.runner_label != def.runner_label {
924            user.runner_label
925        } else {
926            base.runner_label
927        },
928        actor_kind: if user.actor_kind != def.actor_kind {
929            user.actor_kind
930        } else {
931            base.actor_kind
932        },
933        actor_label: if user.actor_label != def.actor_label {
934            user.actor_label
935        } else {
936            base.actor_label
937        },
938        agent: if user.agent != def.agent {
939            user.agent
940        } else {
941            base.agent
942        },
943        model: if user.model != def.model {
944            user.model
945        } else {
946            base.model
947        },
948        env: if user.env != def.env {
949            user.env
950        } else {
951            base.env
952        },
953        job: if user.job != def.job {
954            user.job
955        } else {
956            base.job
957        },
958        branch: if user.branch != def.branch {
959            user.branch
960        } else {
961            base.branch
962        },
963    }
964}
965
966fn merge_sync(base: SyncConfig, user: SyncConfig) -> SyncConfig {
967    let def = SyncConfig::default();
968    SyncConfig {
969        endpoint: if !user.endpoint.is_empty() {
970            user.endpoint
971        } else {
972            base.endpoint
973        },
974        team_token: if !user.team_token.is_empty() {
975            user.team_token
976        } else {
977            base.team_token
978        },
979        team_id: if !user.team_id.is_empty() {
980            user.team_id
981        } else {
982            base.team_id
983        },
984        events_per_batch_max: if user.events_per_batch_max != def.events_per_batch_max {
985            user.events_per_batch_max
986        } else {
987            base.events_per_batch_max
988        },
989        max_body_bytes: if user.max_body_bytes != def.max_body_bytes {
990            user.max_body_bytes
991        } else {
992            base.max_body_bytes
993        },
994        flush_interval_ms: if user.flush_interval_ms != def.flush_interval_ms {
995            user.flush_interval_ms
996        } else {
997            base.flush_interval_ms
998        },
999        sample_rate: if (user.sample_rate - def.sample_rate).abs() > f64::EPSILON {
1000            user.sample_rate
1001        } else {
1002            base.sample_rate
1003        },
1004        team_salt_hex: if !user.team_salt_hex.is_empty() {
1005            user.team_salt_hex
1006        } else {
1007            base.team_salt_hex
1008        },
1009    }
1010}
1011
1012#[cfg(test)]
1013mod tests {
1014    use super::*;
1015    use std::io::Write;
1016    use tempfile::TempDir;
1017
1018    #[test]
1019    fn defaults_when_no_files() {
1020        let dir = TempDir::new().unwrap();
1021        let cfg = load(dir.path()).unwrap();
1022        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
1023        assert_eq!(cfg.scan.min_rescan_seconds, 300);
1024        assert_eq!(cfg.retention.hot_days, 30);
1025        assert_eq!(cfg.storage.cold_after_days, 7);
1026        assert_eq!(cfg.storage.hot_max_bytes_value(), 1_073_741_824);
1027    }
1028
1029    #[test]
1030    fn workspace_config_loaded() {
1031        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1032        let home = TempDir::new().unwrap();
1033        let ws = TempDir::new().unwrap();
1034        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1035        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1036        let mut f = std::fs::File::create(data_dir.join("config.toml")).unwrap();
1037        writeln!(f, "[scan]\nroots = [\"/custom/root\"]").unwrap();
1038        let cfg = load(ws.path()).unwrap();
1039        unsafe { std::env::remove_var("KAIZEN_HOME") };
1040        assert_eq!(cfg.scan.roots, vec!["/custom/root"]);
1041    }
1042
1043    #[test]
1044    fn invalid_toml_ignored() {
1045        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1046        let home = TempDir::new().unwrap();
1047        let ws = TempDir::new().unwrap();
1048        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1049        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1050        std::fs::write(data_dir.join("config.toml"), "not valid toml :::").unwrap();
1051        let cfg = load(ws.path()).unwrap();
1052        unsafe { std::env::remove_var("KAIZEN_HOME") };
1053        assert_eq!(cfg.scan.roots, ScanConfig::default().roots);
1054    }
1055
1056    #[test]
1057    fn merge_user_roots_win() {
1058        let base = Config {
1059            scan: ScanConfig {
1060                roots: vec!["/base".to_string()],
1061                ..ScanConfig::default()
1062            },
1063            ..Default::default()
1064        };
1065        let user = Config {
1066            scan: ScanConfig {
1067                roots: vec!["/user".to_string()],
1068                ..ScanConfig::default()
1069            },
1070            ..Default::default()
1071        };
1072        let merged = merge(base, user);
1073        assert_eq!(merged.scan.roots, vec!["/user"]);
1074    }
1075
1076    #[test]
1077    fn merge_sources_user_default_keeps_workspace_cursor() {
1078        let base = Config {
1079            sources: SourcesConfig {
1080                cursor: CursorSourceConfig {
1081                    enabled: false,
1082                    transcript_glob: "/workspace/glob/**".into(),
1083                },
1084                ..Default::default()
1085            },
1086            ..Default::default()
1087        };
1088        let user = Config::default();
1089        let merged = merge(base, user);
1090        assert!(!merged.sources.cursor.enabled);
1091        assert_eq!(merged.sources.cursor.transcript_glob, "/workspace/glob/**");
1092    }
1093
1094    #[test]
1095    fn merge_retention_field_by_field() {
1096        let base = Config {
1097            retention: RetentionConfig {
1098                hot_days: 60,
1099                warm_days: 90,
1100            },
1101            ..Default::default()
1102        };
1103        let user = Config {
1104            retention: RetentionConfig {
1105                hot_days: 30,
1106                warm_days: 45,
1107            },
1108            ..Default::default()
1109        };
1110        let merged = merge(base, user);
1111        assert_eq!(merged.retention.hot_days, 60);
1112        assert_eq!(merged.retention.warm_days, 45);
1113    }
1114
1115    #[test]
1116    fn merge_retention_user_hot_overrides() {
1117        let base = Config {
1118            retention: RetentionConfig {
1119                hot_days: 60,
1120                warm_days: 90,
1121            },
1122            ..Default::default()
1123        };
1124        let user = Config {
1125            retention: RetentionConfig {
1126                hot_days: 14,
1127                warm_days: 90,
1128            },
1129            ..Default::default()
1130        };
1131        let merged = merge(base, user);
1132        assert_eq!(merged.retention.hot_days, 14);
1133        assert_eq!(merged.retention.warm_days, 90);
1134    }
1135
1136    #[test]
1137    fn merge_storage_user_overrides() {
1138        let base = Config {
1139            storage: StorageConfig {
1140                hot_max_bytes: "2GB".into(),
1141                cold_after_days: 14,
1142                retention_days: 120,
1143                flush_hour_utc: 3,
1144            },
1145            ..Default::default()
1146        };
1147        let user = Config {
1148            storage: StorageConfig {
1149                cold_after_days: 3,
1150                ..StorageConfig::default()
1151            },
1152            ..Default::default()
1153        };
1154        let merged = merge(base, user);
1155        assert_eq!(merged.storage.hot_max_bytes, "2GB");
1156        assert_eq!(merged.storage.cold_after_days, 3);
1157        assert_eq!(merged.storage.retention_days, 120);
1158        assert_eq!(merged.storage.flush_hour_utc, 3);
1159    }
1160
1161    #[test]
1162    fn merge_telemetry_exporters_user_wins_non_empty() {
1163        let base = Config {
1164            telemetry: TelemetryConfig {
1165                fail_open: true,
1166                query: TelemetryQueryConfig::default(),
1167                exporters: vec![ExporterConfig::None],
1168            },
1169            ..Default::default()
1170        };
1171        let user = Config {
1172            telemetry: TelemetryConfig {
1173                fail_open: false,
1174                query: TelemetryQueryConfig::default(),
1175                exporters: vec![ExporterConfig::Dev { enabled: true }],
1176            },
1177            ..Default::default()
1178        };
1179        let merged = merge(base, user);
1180        assert!(!merged.telemetry.fail_open);
1181        assert_eq!(merged.telemetry.exporters.len(), 1);
1182    }
1183
1184    #[test]
1185    fn telemetry_query_defaults() {
1186        let t = TelemetryQueryConfig::default();
1187        assert_eq!(t.provider, QueryAuthority::None);
1188        assert_eq!(t.cache_ttl_seconds, 3600);
1189        assert!(!t.identity_allowlist.team);
1190        assert!(!t.has_provider_for_pull());
1191    }
1192
1193    #[test]
1194    fn telemetry_query_has_provider() {
1195        let ph = TelemetryQueryConfig {
1196            provider: QueryAuthority::Posthog,
1197            ..Default::default()
1198        };
1199        assert!(ph.has_provider_for_pull());
1200        let dd = TelemetryQueryConfig {
1201            provider: QueryAuthority::Datadog,
1202            ..Default::default()
1203        };
1204        assert!(dd.has_provider_for_pull());
1205    }
1206
1207    #[test]
1208    fn merge_telemetry_query_user_wins() {
1209        let base = Config {
1210            telemetry: TelemetryConfig {
1211                query: TelemetryQueryConfig {
1212                    provider: QueryAuthority::Posthog,
1213                    cache_ttl_seconds: 3600,
1214                    identity_allowlist: IdentityAllowlist {
1215                        team: true,
1216                        ..Default::default()
1217                    },
1218                },
1219                ..Default::default()
1220            },
1221            ..Default::default()
1222        };
1223        let user = Config {
1224            telemetry: TelemetryConfig {
1225                query: TelemetryQueryConfig {
1226                    cache_ttl_seconds: 7200,
1227                    ..Default::default()
1228                },
1229                ..Default::default()
1230            },
1231            ..Default::default()
1232        };
1233        let merged = merge(base, user);
1234        assert_eq!(merged.telemetry.query.provider, QueryAuthority::Posthog);
1235        assert_eq!(merged.telemetry.query.cache_ttl_seconds, 7200);
1236        assert!(merged.telemetry.query.identity_allowlist.team);
1237    }
1238
1239    #[test]
1240    fn toml_telemetry_query_roundtrip() {
1241        let _guard = crate::core::paths::test_lock::global().lock().unwrap();
1242        let home = TempDir::new().unwrap();
1243        let ws = TempDir::new().unwrap();
1244        unsafe { std::env::set_var("KAIZEN_HOME", home.path()) };
1245        let data_dir = crate::core::paths::project_data_dir(ws.path()).unwrap();
1246        let toml = r#"
1247[telemetry.query]
1248provider = "datadog"
1249cache_ttl_seconds = 1800
1250
1251[telemetry.query.identity_allowlist]
1252team = true
1253branch = true
1254"#;
1255        std::fs::write(data_dir.join("config.toml"), toml).unwrap();
1256        let cfg = load(ws.path()).unwrap();
1257        unsafe { std::env::remove_var("KAIZEN_HOME") };
1258        assert_eq!(cfg.telemetry.query.provider, QueryAuthority::Datadog);
1259        assert_eq!(cfg.telemetry.query.cache_ttl_seconds, 1800);
1260        assert!(cfg.telemetry.query.identity_allowlist.team);
1261        assert!(cfg.telemetry.query.identity_allowlist.branch);
1262        assert!(!cfg.telemetry.query.identity_allowlist.model);
1263    }
1264}