Skip to main content

kaizen/shell/
telemetry.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! `kaizen telemetry` subcommands: configure, print-effective, doctor, pull, push, schema.
3
4use crate::core::config::{self, ExporterConfig, effective_redaction_salt};
5use crate::core::project_identity::project_name;
6#[cfg(any(feature = "telemetry-datadog", feature = "telemetry-posthog"))]
7use crate::provider::TelemetryQueryProvider;
8use crate::provider::{PullWindow, from_config as provider_from_config};
9use crate::shell::cli::workspace_path;
10use crate::shell::scope;
11use crate::store::Store;
12use crate::store::remote_cache::{RemoteCacheStore, RemotePullState};
13use crate::sync::IngestExportBatch;
14use crate::sync::canonical::KAIZEN_SCHEMA_VERSION;
15use crate::sync::outbound::{EventsBatchBody, OutboundEvent, outbound_event_from_row};
16use crate::sync::redact::redact_payload;
17use crate::sync::smart::outbound_tool_span;
18use crate::sync::workspace_hash;
19use crate::sync::{chunk_events_into_ingest_batches, chunk_tool_spans_into_ingest_batches};
20use crate::telemetry::{self, DatadogResolved, OtlpResolved, PostHogResolved};
21use anyhow::{Context, Result};
22use std::io::{BufRead, Write};
23use std::path::{Path, PathBuf};
24
25#[derive(Debug, Clone, Default)]
26pub struct ConfigureOptions {
27    pub exporter_type: Option<String>,
28    pub path: Option<PathBuf>,
29    pub api_key: Option<String>,
30    pub site: Option<String>,
31    pub host: Option<String>,
32    pub endpoint: Option<String>,
33    pub non_interactive: bool,
34}
35
36/// Validating wizard: prompt for missing creds (or read from env / flags), `health`-check the
37/// resolved provider before touching `~/.kaizen/config.toml`, then append the exporter,
38/// idempotently set `[telemetry.query].provider` so `pull` works without extra config, and
39/// ensure a redaction salt exists. Failure to validate aborts with a clear error and writes
40/// nothing. Re-running for the same exporter type + key field is a no-op (no duplicate row).
41pub fn cmd_telemetry_configure(workspace: Option<&Path>, options: ConfigureOptions) -> Result<()> {
42    let ws = workspace_path(workspace)?;
43    let cfg_path = crate::core::home_paths::file_for_write(&ws, Path::new("config.toml"))?;
44    let home = cfg_path.parent().unwrap().to_path_buf();
45
46    println!("Kaizen telemetry — optional sinks fan-out alongside Kaizen sync.");
47    let t = resolve_exporter_type(&options)?;
48    if t.is_empty() {
49        println!("Aborted.");
50        return Ok(());
51    }
52
53    let block = match t.as_str() {
54        "file" => file_exporter_block(options.path.as_deref()),
55        "dev" => "\n[[telemetry.exporters]]\ntype = \"dev\"\n".to_string(),
56        "datadog" => configure_datadog(&options)?,
57        "posthog" => configure_posthog(&options)?,
58        "otlp" => configure_otlp(&options)?,
59        _ => anyhow::bail!("unknown type (use file, posthog, datadog, otlp, dev)"),
60    };
61
62    let existing = std::fs::read_to_string(&cfg_path).unwrap_or_default();
63    if exporter_already_present(&existing, &t) {
64        println!(
65            "Skipped: a `[[telemetry.exporters]]` row of type `{t}` already exists in {}. \
66             Edit the file directly to change credentials.",
67            cfg_path.display()
68        );
69    } else {
70        append_block(&cfg_path, &block)?;
71    }
72    ensure_query_authority(&cfg_path, &t)?;
73
74    let cfg = config::load(&ws)?;
75    let _ = effective_redaction_salt(&cfg.sync, &home).context(
76        "ensure redaction salt (configured `[sync].team_salt_hex` or auto-generated `local_salt.hex`)",
77    )?;
78    println!("Wrote {}.", cfg_path.display());
79    println!("Next: `kaizen telemetry test` to send one synthetic event to every configured sink.");
80    Ok(())
81}
82
83/// True when the file already contains a `[[telemetry.exporters]]` row whose `type = "<t>"`.
84/// Cheap line scan rather than full TOML parse: keeps the wizard side-effect-free if a user
85/// hand-edited the file with comments/whitespace we cannot round-trip.
86pub(crate) fn exporter_already_present(toml_text: &str, t: &str) -> bool {
87    let mut in_exporter_block = false;
88    let needle = format!("type = \"{t}\"");
89    for line in toml_text.lines() {
90        let l = line.trim();
91        if l.starts_with("[[telemetry.exporters]]") {
92            in_exporter_block = true;
93            continue;
94        }
95        if l.starts_with('[') {
96            in_exporter_block = false;
97            continue;
98        }
99        if in_exporter_block && l == needle {
100            return true;
101        }
102    }
103    false
104}
105
106/// Append `[telemetry.query] provider = "<authority>"` only if the file has no `[telemetry.query]`
107/// table yet. Never overrides an existing user choice; only sets one for `posthog` / `datadog`.
108fn ensure_query_authority(path: &Path, t: &str) -> Result<()> {
109    let authority = match t {
110        "datadog" => "datadog",
111        "posthog" => "posthog",
112        _ => return Ok(()),
113    };
114    let existing = std::fs::read_to_string(path).unwrap_or_default();
115    if existing.lines().any(|l| l.trim() == "[telemetry.query]") {
116        return Ok(());
117    }
118    let block = format!("\n[telemetry.query]\nprovider = \"{authority}\"\n");
119    append_block(path, &block)
120}
121
122fn resolve_exporter_type(opts: &ConfigureOptions) -> Result<String> {
123    if let Some(t) = &opts.exporter_type {
124        return Ok(t.trim().to_lowercase());
125    }
126    if opts.non_interactive {
127        anyhow::bail!("--non-interactive requires --type=<file|posthog|datadog|otlp|dev>");
128    }
129    print!("Type `file`, `posthog`, `datadog`, `otlp`, or `dev` (empty to abort): ");
130    std::io::stdout().flush()?;
131    let mut line = String::new();
132    std::io::stdin().lock().read_line(&mut line)?;
133    Ok(line.trim().to_lowercase())
134}
135
136fn configure_datadog(opts: &ConfigureOptions) -> Result<String> {
137    let api_key = read_secret(
138        "Datadog API key (DD_API_KEY, 32 hex chars — NOT the `ddapp_*` Application Key; \
139         create one at Org Settings > API Keys)",
140        opts.api_key.clone(),
141        "DD_API_KEY",
142        opts.non_interactive,
143    )?;
144    if let Some(rejected) = reject_obvious_app_key(&api_key) {
145        anyhow::bail!("{rejected}");
146    }
147    let site = read_value(
148        "Datadog site",
149        opts.site.clone(),
150        "DD_SITE",
151        Some("datadoghq.com".into()),
152        opts.non_interactive,
153    )?;
154    health_check_datadog(&api_key, &site).context(
155        "Datadog credentials rejected (DD-API-KEY /api/v1/validate failed); not writing TOML",
156    )?;
157    Ok(datadog_block(&api_key, &site))
158}
159
160fn configure_posthog(opts: &ConfigureOptions) -> Result<String> {
161    let key = read_secret(
162        "PostHog project API key (phc_...)",
163        opts.api_key.clone(),
164        "POSTHOG_API_KEY",
165        opts.non_interactive,
166    )?;
167    let host = read_value(
168        "PostHog host",
169        opts.host.clone(),
170        "POSTHOG_HOST",
171        Some("https://us.i.posthog.com".into()),
172        opts.non_interactive,
173    )?;
174    health_check_posthog(&host).context("PostHog host unreachable; not writing TOML")?;
175    Ok(format!(
176        "\n[[telemetry.exporters]]\ntype = \"posthog\"\nproject_api_key = \"{}\"\nhost = \"{}\"\n",
177        key.replace('\\', "\\\\").replace('"', "\\\""),
178        host.replace('\\', "\\\\").replace('"', "\\\""),
179    ))
180}
181
182fn configure_otlp(opts: &ConfigureOptions) -> Result<String> {
183    let endpoint = read_value(
184        "OTLP endpoint",
185        opts.endpoint.clone(),
186        "OTEL_EXPORTER_OTLP_ENDPOINT",
187        Some("http://127.0.0.1:4318".into()),
188        opts.non_interactive,
189    )?;
190    Ok(format!(
191        "\n[[telemetry.exporters]]\ntype = \"otlp\"\nendpoint = \"{}\"\n",
192        endpoint.replace('\\', "\\\\").replace('"', "\\\""),
193    ))
194}
195
196/// Local sanity check: DD Application Keys start with `ddapp_`; sending one as `DD-API-KEY`
197/// always 403s. Catch the mistake before the network round-trip with a hint that names both
198/// key types so the user can tell them apart.
199pub(crate) fn reject_obvious_app_key(value: &str) -> Option<&'static str> {
200    if value.starts_with("ddapp_") {
201        Some(
202            "looks like a Datadog Application Key (`ddapp_*`); the wizard needs the API Key \
203             (32 hex chars). Generate one at Org Settings > API Keys, then rerun.",
204        )
205    } else {
206        None
207    }
208}
209
210fn datadog_block(api_key: &str, site: &str) -> String {
211    format!(
212        "\n[[telemetry.exporters]]\ntype = \"datadog\"\napi_key = \"{}\"\nsite = \"{}\"\n",
213        api_key.replace('\\', "\\\\").replace('"', "\\\""),
214        site.replace('\\', "\\\\").replace('"', "\\\""),
215    )
216}
217
218fn append_block(path: &Path, block: &str) -> Result<()> {
219    let mut content = std::fs::read_to_string(path).unwrap_or_default();
220    if !content.is_empty() {
221        content.push('\n');
222    }
223    content.push_str(block);
224    crate::core::safe_fs::write_atomic(path, content.as_bytes())?;
225    Ok(())
226}
227
228fn read_secret(
229    prompt: &str,
230    flag: Option<String>,
231    env_key: &str,
232    non_interactive: bool,
233) -> Result<String> {
234    if let Some(v) = flag.filter(|s| !s.is_empty()) {
235        return Ok(v);
236    }
237    if let Ok(v) = std::env::var(env_key)
238        && !v.is_empty()
239    {
240        return Ok(v);
241    }
242    if non_interactive {
243        anyhow::bail!("missing {env_key}: set the env var or pass --api-key");
244    }
245    print!("{prompt}: ");
246    std::io::stdout().flush()?;
247    let mut line = String::new();
248    std::io::stdin().lock().read_line(&mut line)?;
249    let v = line.trim().to_string();
250    if v.is_empty() {
251        anyhow::bail!("{env_key} is required");
252    }
253    Ok(v)
254}
255
256fn read_value(
257    prompt: &str,
258    flag: Option<String>,
259    env_key: &str,
260    default: Option<String>,
261    non_interactive: bool,
262) -> Result<String> {
263    if let Some(v) = flag.filter(|s| !s.is_empty()) {
264        return Ok(v);
265    }
266    if let Ok(v) = std::env::var(env_key)
267        && !v.is_empty()
268    {
269        return Ok(v);
270    }
271    if non_interactive {
272        return default.ok_or_else(|| anyhow::anyhow!("missing {env_key}; set env or pass flag"));
273    }
274    let hint = default
275        .as_deref()
276        .map(|d| format!(" [{d}]"))
277        .unwrap_or_default();
278    print!("{prompt}{hint}: ");
279    std::io::stdout().flush()?;
280    let mut line = String::new();
281    std::io::stdin().lock().read_line(&mut line)?;
282    let v = line.trim().to_string();
283    if v.is_empty() {
284        return default.ok_or_else(|| anyhow::anyhow!("{env_key} is required"));
285    }
286    Ok(v)
287}
288
289fn health_check_datadog(api_key: &str, site: &str) -> Result<()> {
290    let r = DatadogResolved {
291        site: site.to_string(),
292        api_key: api_key.to_string(),
293        app_key: None,
294    };
295    #[cfg(feature = "telemetry-datadog")]
296    {
297        let c = crate::provider::datadog::DatadogQueryClient::new(&r);
298        c.health()
299    }
300    #[cfg(not(feature = "telemetry-datadog"))]
301    {
302        let _ = &r;
303        anyhow::bail!("rebuild with `--features telemetry-datadog` to validate Datadog");
304    }
305}
306
307fn health_check_posthog(host: &str) -> Result<()> {
308    let r = PostHogResolved {
309        host: host.to_string(),
310        project_api_key: String::new(),
311    };
312    #[cfg(feature = "telemetry-posthog")]
313    {
314        let c = crate::provider::posthog::PostHogQueryClient::new(&r);
315        c.health()
316    }
317    #[cfg(not(feature = "telemetry-posthog"))]
318    {
319        let _ = &r;
320        anyhow::bail!("rebuild with `--features telemetry-posthog` to validate PostHog");
321    }
322}
323
324fn file_exporter_block(path: Option<&Path>) -> String {
325    let mut block = String::from(
326        r#"
327[[telemetry.exporters]]
328type = "file"
329enabled = true
330"#,
331    );
332    if let Some(path) = path {
333        use std::fmt::Write as _;
334        let path = path
335            .to_string_lossy()
336            .replace('\\', "\\\\")
337            .replace('"', "\\\"");
338        writeln!(&mut block, "path = \"{path}\"").unwrap();
339    } else {
340        block.push_str("# path = \"telemetry.ndjson\"   # optional; relative to project data\n");
341    }
342    block
343}
344
345/// Redacted: show which env/Toml fields are visible for `telemetry` sinks.
346pub fn print_effective_config_text(workspace: Option<&Path>) -> Result<String> {
347    let ws = workspace_path(workspace)?;
348    let cfg = config::load(&ws)?;
349    use std::fmt::Write;
350    let mut s = String::new();
351    writeln!(&mut s, "telemetry.fail_open: {}", cfg.telemetry.fail_open).unwrap();
352    for (i, e) in cfg.telemetry.exporters.iter().enumerate() {
353        match e {
354            ExporterConfig::None => writeln!(&mut s, "[{i}] type=none (ignored)").unwrap(),
355            ExporterConfig::File { enabled, path } => {
356                let p = path
357                    .as_deref()
358                    .map(|p| p.to_string())
359                    .unwrap_or_else(|| "$KAIZEN_HOME/projects/<slug>/telemetry.ndjson".into());
360                writeln!(&mut s, "[{i}] type=file enabled={enabled} path={p}").unwrap();
361            }
362            ExporterConfig::Dev { enabled } => {
363                writeln!(&mut s, "[{i}] type=dev enabled={enabled}").unwrap();
364            }
365            ExporterConfig::PostHog { .. } => {
366                let line = if let Some(r) = PostHogResolved::from_config(e) {
367                    format!(
368                        "[{i}] type=posthog host={} key=<redacted len {}>",
369                        r.host,
370                        r.project_api_key.len()
371                    )
372                } else {
373                    format!(
374                        "[{i}] type=posthog (unresolved: set POSTHOG_API_KEY or project_api_key)"
375                    )
376                };
377                writeln!(&mut s, "{line}").unwrap();
378            }
379            ExporterConfig::Datadog { .. } => {
380                let line = if let Some(r) = DatadogResolved::from_config(e) {
381                    format!(
382                        "[{i}] type=datadog site={} key=<redacted len {}>",
383                        r.site,
384                        r.api_key.len()
385                    )
386                } else {
387                    format!("[{i}] type=datadog (unresolved: set DD_API_KEY or api_key in TOML)")
388                };
389                writeln!(&mut s, "{line}").unwrap();
390            }
391            ExporterConfig::Otlp { .. } => {
392                let line = if let Some(r) = OtlpResolved::from_config(e) {
393                    format!("[{i}] type=otlp endpoint={}", r.endpoint)
394                } else {
395                    format!("[{i}] type=otlp (unresolved: OTEL_EXPORTER_OTLP_ENDPOINT)")
396                };
397                writeln!(&mut s, "{line}").unwrap();
398            }
399        }
400    }
401    if cfg.telemetry.exporters.is_empty() {
402        writeln!(&mut s, "(no [[telemetry.exporters]] rows)").unwrap();
403    }
404    Ok(s)
405}
406
407pub fn cmd_telemetry_print_effective(workspace: Option<&Path>) -> Result<()> {
408    print!("{}", print_effective_config_text(workspace)?);
409    Ok(())
410}
411
412/// Alias of [`cmd_telemetry_configure`].
413pub fn cmd_telemetry_init(workspace: Option<&Path>, options: ConfigureOptions) -> Result<()> {
414    cmd_telemetry_configure(workspace, options)
415}
416
417/// Resolve config, run provider `health` when available, show redacted exporter view.
418pub fn cmd_telemetry_doctor(workspace: Option<&Path>) -> Result<()> {
419    let ws = workspace_path(workspace)?;
420    let cfg = config::load(&ws)?;
421    println!("telemetry.fail_open: {}", cfg.telemetry.fail_open);
422    println!(
423        "telemetry.query.cache_ttl_seconds: {}",
424        cfg.telemetry.query.cache_ttl_seconds
425    );
426    match cfg.telemetry.query.provider {
427        crate::core::config::QueryAuthority::None => println!("telemetry.query.provider: none"),
428        crate::core::config::QueryAuthority::Posthog => {
429            println!("telemetry.query.provider: posthog");
430        }
431        crate::core::config::QueryAuthority::Datadog => {
432            println!("telemetry.query.provider: datadog");
433        }
434    }
435    if let Some(p) = provider_from_config(&cfg.telemetry) {
436        match p.health() {
437            Ok(()) => println!("provider health: ok (schema: {})", p.schema_version()),
438            Err(e) => eprintln!("provider health: {e}"),
439        }
440    } else {
441        println!("query provider: (not configured or features disabled; pull disabled)");
442    }
443    println!("\n{}", print_effective_config_text(Some(&ws))?);
444    println!("\nOTLP: export only — no query/pull in v1.");
445    Ok(())
446}
447
448/// Run one page of `pull` and refresh `remote_pull_state` (payload import when APIs are wired).
449pub fn cmd_telemetry_pull(workspace: Option<&Path>, days: u32) -> Result<()> {
450    let ws = workspace_path(workspace)?;
451    let cfg = config::load(&ws)?;
452    let p = provider_from_config(&cfg.telemetry).ok_or_else(|| {
453        anyhow::anyhow!(
454            "no query provider resolved. Either:\n  \
455             1. Run `kaizen telemetry configure --type=datadog` (or `posthog`) so the wizard \
456             writes both `[[telemetry.exporters]]` and `[telemetry.query]`, OR\n  \
457             2. Set `[telemetry.query].provider = \"datadog\"` in `~/.kaizen/config.toml` and \
458             ensure DD_API_KEY is reachable (TOML row or env)."
459        )
460    })?;
461    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
462    let page = p.pull(PullWindow { days }, None)?;
463    if !cfg.sync.team_id.trim().is_empty()
464        && let Some(ctx) = crate::sync::ingest_ctx(&cfg, ws.to_path_buf())
465        && let Some(wh) = crate::sync::smart::workspace_hash_for(&ctx)
466    {
467        match crate::provider::import_pull_page_to_remote(&store, &cfg.sync.team_id, &wh, &page) {
468            Ok(n) if n > 0 => {
469                tracing::debug!(n, "remote_events: imported from provider pull (cmd)")
470            }
471            _ => {}
472        }
473    }
474    let now_ms = std::time::SystemTime::now()
475        .duration_since(std::time::UNIX_EPOCH)
476        .unwrap_or_default()
477        .as_millis() as i64;
478    let label = match cfg.telemetry.query.provider {
479        crate::core::config::QueryAuthority::None => "none",
480        crate::core::config::QueryAuthority::Posthog => "posthog",
481        crate::core::config::QueryAuthority::Datadog => "datadog",
482    };
483    store.set_pull_state(&RemotePullState {
484        query_provider: label.into(),
485        cursor_json: page.next_cursor.unwrap_or_default(),
486        last_success_ms: Some(now_ms),
487    })?;
488    println!("pull: received {} item(s) (page)", page.items.len());
489    Ok(())
490}
491
492/// Replay stored events in a trailing window through configured telemetry exporters (no Kaizen POST).
493pub fn cmd_telemetry_push(
494    workspace: Option<&Path>,
495    all_workspaces: bool,
496    days: u32,
497    dry_run: bool,
498) -> Result<()> {
499    let roots = scope::resolve(workspace, all_workspaces)?;
500    let primary = roots
501        .first()
502        .cloned()
503        .ok_or_else(|| anyhow::anyhow!("no workspace roots"))?;
504    let cfg = config::load(&primary)?;
505    let home = crate::core::home_paths::root(&primary)?;
506    let salt = effective_redaction_salt(&cfg.sync, &home).context(
507        "resolve redaction salt (configured `[sync].team_salt_hex` or auto-generated `local_salt.hex`)",
508    )?;
509    let registry = telemetry::load_exporters(&cfg.telemetry, primary.as_path());
510    if registry.is_empty() {
511        anyhow::bail!(
512            "no telemetry exporters to push to: add [[telemetry.exporters]] (e.g. type = \"file\" \
513             needs no extra feature; PostHog/Datadog/OTLP need build features); see \
514             `kaizen telemetry print-effective-config`."
515        );
516    }
517    let fail_open = cfg.telemetry.fail_open;
518    let team_id = cfg.sync.team_id.clone();
519    let end_ms = std::time::SystemTime::now()
520        .duration_since(std::time::UNIX_EPOCH)
521        .unwrap_or_default()
522        .as_millis() as u64;
523    let start_ms = end_ms.saturating_sub((days as u64).saturating_mul(86_400_000));
524
525    let mut total_events: u64 = 0;
526    let mut total_spans: u64 = 0;
527    let mut total_batches: u64 = 0;
528    let intake_warning_threshold_ms = end_ms.saturating_sub(18 * 3_600_000);
529    let mut total_stale: u64 = 0;
530
531    for root in &roots {
532        let store = Store::open(&crate::core::workspace::db_path(root)?)?;
533        let ws_key = root.to_string_lossy().to_string();
534        let wh = workspace_hash(&salt, root.as_path());
535        let project = project_name(root.as_path());
536
537        let event_rows = store.retro_events_in_window(&ws_key, start_ms, end_ms)?;
538        let stale_events = event_rows
539            .iter()
540            .filter(|(_, ev)| ev.ts_ms < intake_warning_threshold_ms)
541            .count() as u64;
542        let outbound_events: Vec<_> = event_rows
543            .into_iter()
544            .map(|(session, ev)| {
545                let mut o = outbound_event_from_row(&ev, &session, &salt);
546                redact_payload(&mut o.payload, root.as_path(), &salt);
547                o
548            })
549            .collect();
550        let n_events = outbound_events.len() as u64;
551        let event_batches = chunk_events_into_ingest_batches(
552            team_id.clone(),
553            wh.clone(),
554            project.clone(),
555            outbound_events,
556            &cfg.sync,
557        )?;
558
559        let span_rows = store.tool_spans_sync_rows_in_window(&ws_key, start_ms, end_ms)?;
560        let stale_spans = span_rows
561            .iter()
562            .filter(|r| {
563                r.started_at_ms
564                    .or(r.ended_at_ms)
565                    .map(|t| t < intake_warning_threshold_ms)
566                    .unwrap_or(false)
567            })
568            .count() as u64;
569        let outbound_spans: Vec<_> = span_rows
570            .iter()
571            .map(|r| outbound_tool_span(r, &salt))
572            .collect();
573        let n_spans = outbound_spans.len() as u64;
574        let span_batches = chunk_tool_spans_into_ingest_batches(
575            team_id.clone(),
576            wh,
577            project,
578            outbound_spans,
579            &cfg.sync,
580        )?;
581
582        let bcount = (event_batches.len() + span_batches.len()) as u64;
583        total_events += n_events;
584        total_spans += n_spans;
585        total_batches += bcount;
586        total_stale += stale_events + stale_spans;
587
588        if dry_run {
589            eprintln!(
590                "telemetry push (dry-run): {} — {} event(s), {} span(s), {} batch(es)",
591                root.display(),
592                n_events,
593                n_spans,
594                bcount
595            );
596            continue;
597        }
598        for batch in event_batches.into_iter().chain(span_batches) {
599            registry
600                .fan_out(fail_open, &batch)
601                .with_context(|| format!("telemetry fan-out ({})", batch.kind_name()))?;
602        }
603        eprintln!(
604            "telemetry push: {} — sent {} event(s), {} span(s) in {} batch(es)",
605            root.display(),
606            n_events,
607            n_spans,
608            bcount
609        );
610    }
611
612    eprintln!(
613        "telemetry push: total {} event(s), {} span(s), {} batch(es) across {} workspace(s){}",
614        total_events,
615        total_spans,
616        total_batches,
617        roots.len(),
618        if dry_run { " (dry-run)" } else { "" }
619    );
620    if total_stale > 0 {
621        eprintln!(
622            "note: {} item(s) have a `timestamp` older than 18h. Datadog Logs intake silently \
623             drops these (organization default). PostHog/OTLP/file sinks accept them without \
624             change. Use `--days N` with N <= 1 to skip stale items.",
625            total_stale
626        );
627    }
628    Ok(())
629}
630
631/// Send one synthetic redacted event through every configured exporter, report ok/fail per
632/// sink. Pure observability: no SQLite read, no outbox enqueue, no Kaizen POST.
633pub fn cmd_telemetry_test(workspace: Option<&Path>) -> Result<()> {
634    let ws = workspace_path(workspace)?;
635    let cfg = config::load(&ws)?;
636    let registry = telemetry::load_exporters(&cfg.telemetry, ws.as_path());
637    if registry.is_empty() {
638        anyhow::bail!(
639            "no `[[telemetry.exporters]]` rows resolved; run `kaizen telemetry configure --type=...` first"
640        );
641    }
642    let batch = synthetic_batch(&cfg.sync.team_id);
643    println!("telemetry test: sending one synthetic event to each configured sink ...");
644    let mut all_ok = true;
645    for name in registry.exporter_names() {
646        match registry.export_one(&name, &batch) {
647            Ok(()) => println!("  [{name}] ok"),
648            Err(e) => {
649                all_ok = false;
650                println!("  [{name}] FAIL: {e:#}");
651            }
652        }
653    }
654    if !all_ok {
655        anyhow::bail!("one or more exporters failed (see above)");
656    }
657    println!("telemetry test: all exporters accepted the synthetic event.");
658    Ok(())
659}
660
661fn synthetic_batch(team_id: &str) -> IngestExportBatch {
662    let now_ms = std::time::SystemTime::now()
663        .duration_since(std::time::UNIX_EPOCH)
664        .map(|d| d.as_millis() as u64)
665        .unwrap_or(0);
666    IngestExportBatch::Events(EventsBatchBody {
667        team_id: team_id.to_string(),
668        workspace_hash: "blake3:test-workspace".into(),
669        project_name: Some("telemetry-test".into()),
670        events: vec![OutboundEvent {
671            session_id_hash: "blake3:test-session".into(),
672            event_seq: 0,
673            ts_ms: now_ms,
674            agent: "kaizen".into(),
675            model: "synthetic".into(),
676            kind: "lifecycle".into(),
677            source: "tail".into(),
678            tool: None,
679            tool_call_id: None,
680            tokens_in: Some(0),
681            tokens_out: Some(0),
682            reasoning_tokens: None,
683            cost_usd_e6: None,
684            payload: serde_json::json!({"kaizen.telemetry_test": true}),
685        }],
686    })
687}
688
689/// Example JSON for canonical per-item export names (ingest + third-party mappers).
690pub fn cmd_telemetry_print_schema() -> Result<()> {
691    let v = serde_json::json!({
692        "kaizen_schema_version": KAIZEN_SCHEMA_VERSION,
693        "event_names": [
694            "kaizen.event",
695            "kaizen.tool_span",
696            "kaizen.repo_snapshot_chunk",
697            "kaizen.workspace_fact_snapshot"
698        ],
699        "note": "Full shapes: see sync::canonical::CanonicalItem and expand_ingest_batch (tests include golden JSON).",
700    });
701    println!("{}", serde_json::to_string_pretty(&v)?);
702    Ok(())
703}
704
705#[cfg(test)]
706mod tests {
707    use super::*;
708
709    #[test]
710    fn exporter_already_present_detects_existing_datadog_row() {
711        let toml = r#"
712[[telemetry.exporters]]
713type = "datadog"
714api_key = "abc"
715site = "us5.datadoghq.com"
716"#;
717        assert!(exporter_already_present(toml, "datadog"));
718        assert!(!exporter_already_present(toml, "posthog"));
719    }
720
721    #[test]
722    fn exporter_already_present_handles_other_tables_between() {
723        let toml = r#"
724[[telemetry.exporters]]
725type = "file"
726enabled = true
727
728[telemetry.query]
729provider = "datadog"
730
731[[telemetry.exporters]]
732type = "datadog"
733api_key = "abc"
734"#;
735        assert!(exporter_already_present(toml, "file"));
736        assert!(exporter_already_present(toml, "datadog"));
737        assert!(!exporter_already_present(toml, "otlp"));
738    }
739
740    #[test]
741    fn reject_obvious_app_key_catches_ddapp_prefix() {
742        assert!(reject_obvious_app_key("ddapp_FjBvwn3GKN8C6jiqltnbK0UHdUEs3gmlP1").is_some());
743        assert!(reject_obvious_app_key("not_ddapp_plain_api_key_value").is_none());
744    }
745
746    #[test]
747    fn ensure_query_authority_appends_when_missing() {
748        let dir = tempfile::TempDir::new().unwrap();
749        let p = dir.path().join("config.toml");
750        std::fs::write(
751            &p,
752            "[[telemetry.exporters]]\ntype = \"datadog\"\napi_key = \"abc\"\n",
753        )
754        .unwrap();
755        ensure_query_authority(&p, "datadog").unwrap();
756        let s = std::fs::read_to_string(&p).unwrap();
757        assert!(s.contains("[telemetry.query]"));
758        assert!(s.contains("provider = \"datadog\""));
759    }
760
761    #[test]
762    fn ensure_query_authority_idempotent_when_present() {
763        let dir = tempfile::TempDir::new().unwrap();
764        let p = dir.path().join("config.toml");
765        let original = "[[telemetry.exporters]]\ntype = \"datadog\"\n\n[telemetry.query]\nprovider = \"posthog\"\n";
766        std::fs::write(&p, original).unwrap();
767        ensure_query_authority(&p, "datadog").unwrap();
768        let s = std::fs::read_to_string(&p).unwrap();
769        // User's existing posthog choice must NOT be overridden by the wizard.
770        assert_eq!(s, original);
771    }
772}