1use crate::config::Config;
7use anyhow::{Context, Result};
8use chrono::{DateTime, Utc};
9use reqwest::StatusCode;
10use serde::{Deserialize, Serialize};
11use std::backtrace::Backtrace;
12use std::io::{self, Write};
13use std::panic;
14use std::path::{Path, PathBuf};
15use std::sync::Once;
16use std::time::Duration;
17use uuid::Uuid;
18
19const REPORT_VERSION: u32 = 1;
20const CRASH_SCHEMA: &str = "codetether.crash.v1";
21const CRASH_SOURCE: &str = "codetether-agent";
22const MAX_PENDING_REPORTS: usize = 50;
23const MAX_PANIC_MESSAGE_CHARS: usize = 2048;
24const MAX_BACKTRACE_CHARS: usize = 32_000;
25const MAX_COMMAND_CHARS: usize = 1024;
26
27const CRASH_UPLOAD_STATE_FILE: &str = "crash-upload-state.json";
28const MIN_UPLOAD_BACKOFF_SECS: u64 = 5 * 60; const MAX_UPLOAD_BACKOFF_SECS: u64 = 24 * 60 * 60; const MAX_STATUS_FAILURES_PER_FLUSH: usize = 3;
31
32const ENV_CRASH_REPORT_AUTH_TOKEN: &str = "CODETETHER_CRASH_REPORT_AUTH_TOKEN";
33const ENV_CRASH_REPORT_API_KEY: &str = "CODETETHER_CRASH_REPORT_API_KEY";
34
35#[derive(Debug, Clone)]
36struct CrashReporterSettings {
37 enabled: bool,
38 endpoint: String,
39 report_dir: PathBuf,
40 app_version: String,
41 command_line: String,
42 install_id: String,
43 auth_token: Option<String>,
44 api_key: Option<String>,
45}
46
47impl CrashReporterSettings {
48 fn from_config(config: &Config) -> Self {
49 let install_id = match load_or_create_install_id() {
50 Ok(id) => id,
51 Err(err) => {
52 tracing::warn!(error = %err, "Failed to load persistent install ID; using ephemeral value");
53 Uuid::new_v4().to_string()
54 }
55 };
56
57 Self {
58 enabled: config.telemetry.crash_reporting_enabled(),
59 endpoint: config.telemetry.crash_report_endpoint(),
60 report_dir: crash_report_dir(),
61 app_version: env!("CARGO_PKG_VERSION").to_string(),
62 command_line: truncate_with_ellipsis(
63 &std::env::args().collect::<Vec<_>>().join(" "),
64 MAX_COMMAND_CHARS,
65 ),
66 install_id,
67 auth_token: env_non_empty(ENV_CRASH_REPORT_AUTH_TOKEN),
68 api_key: env_non_empty(ENV_CRASH_REPORT_API_KEY),
69 }
70 }
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74struct CrashReport {
75 report_version: u32,
76 report_id: String,
77 occurred_at: DateTime<Utc>,
78 app_version: String,
79 command_line: String,
80 os: String,
81 arch: String,
82 process_id: u32,
83 thread_name: String,
84 panic_message: String,
85 panic_location: Option<String>,
86 backtrace: String,
87 #[serde(default, skip_serializing_if = "Option::is_none")]
88 memory: Option<crate::telemetry::memory::MemorySnapshot>,
89}
90
91impl CrashReport {
92 fn from_panic_info(
93 settings: &CrashReporterSettings,
94 panic_info: &panic::PanicHookInfo<'_>,
95 ) -> Self {
96 let panic_message = panic_payload_to_string(panic_info);
97 let panic_location = panic_info
98 .location()
99 .map(|loc| format!("{}:{}:{}", loc.file(), loc.line(), loc.column()));
100 let thread_name = std::thread::current()
101 .name()
102 .map(|s| s.to_string())
103 .unwrap_or_else(|| "unnamed".to_string());
104 let backtrace = Backtrace::force_capture().to_string();
105
106 Self {
107 report_version: REPORT_VERSION,
108 report_id: Uuid::new_v4().to_string(),
109 occurred_at: Utc::now(),
110 app_version: settings.app_version.clone(),
111 command_line: settings.command_line.clone(),
112 os: std::env::consts::OS.to_string(),
113 arch: std::env::consts::ARCH.to_string(),
114 process_id: std::process::id(),
115 thread_name,
116 panic_message: truncate_with_ellipsis(&panic_message, MAX_PANIC_MESSAGE_CHARS),
117 panic_location,
118 backtrace: truncate_with_ellipsis(&backtrace, MAX_BACKTRACE_CHARS),
119 memory: Some(crate::telemetry::memory::MemorySnapshot::capture()),
120 }
121 }
122}
123
124#[derive(Debug, Clone, Serialize)]
125struct CrashEnvelope<'a> {
126 schema: &'static str,
127 source: &'static str,
128 sent_at: DateTime<Utc>,
129 install_id: &'a str,
130 report: &'a CrashReport,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize, Default)]
134struct CrashUploadState {
135 consecutive_failures: u32,
137 next_attempt_at: Option<DateTime<Utc>>,
139 last_attempt_at: Option<DateTime<Utc>>,
141 last_error: Option<String>,
143}
144
145pub async fn maybe_prompt_for_consent(config: &Config, allow_prompt: bool) -> Config {
146 if !allow_prompt {
147 return config.clone();
148 }
149
150 if config.telemetry.crash_reporting.is_some() || config.telemetry.crash_reporting_prompted() {
151 return config.clone();
152 }
153
154 println!();
155 println!("CodeTether Optional Crash Reporting");
156 println!("- Helps us fix catastrophic crashes faster.");
157 println!("- Sends panic message, stack trace, app version, OS/arch, and command.");
158 println!("- Does not intentionally include API keys or source files.");
159 println!(
160 "- You can change this any time with `codetether config --set telemetry.crash_reporting=true|false`."
161 );
162
163 let enabled = match prompt_yes_no("Enable crash reporting now? [y/N]: ") {
164 Ok(v) => v,
165 Err(err) => {
166 tracing::warn!(error = %err, "Failed to collect crash reporting consent input");
167 return config.clone();
168 }
169 };
170
171 if let Err(err) = persist_consent_choice(enabled).await {
172 tracing::warn!(error = %err, "Failed to persist crash reporting consent choice");
173 return config.clone();
174 }
175
176 if enabled {
177 println!("Crash reporting enabled.");
178 if env_non_empty(ENV_CRASH_REPORT_AUTH_TOKEN).is_none()
179 && env_non_empty(ENV_CRASH_REPORT_API_KEY).is_none()
180 {
181 println!(
182 "If your telemetry endpoint requires auth, set {} or {}.",
183 ENV_CRASH_REPORT_AUTH_TOKEN, ENV_CRASH_REPORT_API_KEY
184 );
185 }
186 } else {
187 println!("Crash reporting disabled.");
188 }
189
190 match Config::load().await {
191 Ok(updated) => updated,
192 Err(err) => {
193 tracing::warn!(error = %err, "Failed to reload config after consent update");
194 config.clone()
195 }
196 }
197}
198
199pub async fn initialize(config: &Config) {
200 let settings = CrashReporterSettings::from_config(config);
201 install_panic_hook(settings.clone());
202
203 crate::telemetry::rss_watchdog::spawn(settings.report_dir.clone());
208
209 if !settings.enabled {
210 return;
211 }
212
213 if settings.auth_token.is_none() && settings.api_key.is_none() {
214 tracing::info!(
215 "Crash reporting enabled without auth headers. Set CODETETHER_CRASH_REPORT_AUTH_TOKEN or CODETETHER_CRASH_REPORT_API_KEY if your endpoint requires authentication."
216 );
217 }
218
219 if let Err(err) = flush_pending_reports(&settings).await {
220 tracing::warn!(error = %err, "Failed to flush pending crash reports");
221 }
222}
223
224fn install_panic_hook(settings: CrashReporterSettings) {
225 static PANIC_HOOK_ONCE: Once = Once::new();
226 PANIC_HOOK_ONCE.call_once(|| {
227 let default_hook = panic::take_hook();
228 panic::set_hook(Box::new(move |panic_info| {
229 let persisted = persist_crash_report(&settings, panic_info);
230 default_hook(panic_info);
231
232 match persisted {
233 Ok(path) if settings.enabled => {
234 eprintln!(
235 "codetether: crash report queued at '{}' and will be sent on next startup.",
236 path.display()
237 );
238 }
239 Ok(path) => {
240 eprintln!(
241 "codetether: crash report saved at '{}'. Crash reporting is disabled (opt-in).",
242 path.display()
243 );
244 eprintln!(
245 "codetether: enable with `codetether config --set telemetry.crash_reporting=true`"
246 );
247 }
248 Err(err) => {
249 eprintln!("codetether: failed to persist crash report: {err}");
250 }
251 }
252 }));
253 });
254}
255
256fn persist_crash_report(
257 settings: &CrashReporterSettings,
258 panic_info: &panic::PanicHookInfo<'_>,
259) -> Result<PathBuf> {
260 std::fs::create_dir_all(&settings.report_dir)
261 .with_context(|| format!("create report dir {}", settings.report_dir.display()))?;
262
263 let report = CrashReport::from_panic_info(settings, panic_info);
264 let file_name = format!(
265 "{}-{}.json",
266 report.occurred_at.format("%Y%m%dT%H%M%S%.3fZ"),
267 report.report_id
268 );
269 let path = settings.report_dir.join(file_name);
270 let payload = serde_json::to_string_pretty(&report)?;
271 std::fs::write(&path, payload).with_context(|| format!("write report {}", path.display()))?;
272
273 prune_old_reports(&settings.report_dir, MAX_PENDING_REPORTS)?;
274 Ok(path)
275}
276
277fn prune_old_reports(report_dir: &Path, max_reports: usize) -> Result<()> {
278 let mut reports = pending_report_paths(report_dir)?;
279 if reports.len() <= max_reports {
280 return Ok(());
281 }
282
283 reports.sort_by_key(|path| {
284 std::fs::metadata(path)
285 .ok()
286 .and_then(|meta| meta.modified().ok())
287 });
288
289 let remove_count = reports.len().saturating_sub(max_reports);
290 for path in reports.into_iter().take(remove_count) {
291 if let Err(err) = std::fs::remove_file(&path) {
292 tracing::warn!(path = %path.display(), error = %err, "Failed pruning old crash report");
293 }
294 }
295
296 Ok(())
297}
298
299fn pending_report_paths(report_dir: &Path) -> Result<Vec<PathBuf>> {
300 if !report_dir.exists() {
301 return Ok(Vec::new());
302 }
303
304 let mut paths = Vec::new();
305 for entry in std::fs::read_dir(report_dir)? {
306 let entry = entry?;
307 let path = entry.path();
308 if path.extension().is_some_and(|ext| ext == "json") {
309 paths.push(path);
310 }
311 }
312
313 paths.sort_by_key(|path| {
314 std::fs::metadata(path)
315 .ok()
316 .and_then(|meta| meta.modified().ok())
317 });
318
319 Ok(paths)
320}
321
322async fn flush_pending_reports(settings: &CrashReporterSettings) -> Result<()> {
323 if should_skip_upload_due_to_backoff(settings).await {
326 return Ok(());
327 }
328
329 let paths = pending_report_paths(&settings.report_dir)?;
330 if paths.is_empty() {
331 return Ok(());
332 }
333
334 let client = reqwest::Client::builder()
335 .timeout(Duration::from_secs(8))
336 .user_agent(format!("codetether/{}", settings.app_version))
337 .build()
338 .context("build crash reporting HTTP client")?;
339
340 let mut sent = 0usize;
341 let mut failed = 0usize;
342 let mut status_failures = 0usize;
343 let mut transport_error: Option<anyhow::Error> = None;
344
345 for path in paths {
346 let raw = match tokio::fs::read_to_string(&path).await {
347 Ok(content) => content,
348 Err(err) => {
349 failed += 1;
350 tracing::warn!(path = %path.display(), error = %err, "Failed reading crash report");
351 continue;
352 }
353 };
354
355 let report: CrashReport = match serde_json::from_str(&raw) {
356 Ok(report) => report,
357 Err(err) => {
358 failed += 1;
359 tracing::warn!(
360 path = %path.display(),
361 error = %err,
362 "Invalid crash report format; dropping file"
363 );
364 let _ = tokio::fs::remove_file(&path).await;
365 continue;
366 }
367 };
368
369 match upload_report(settings, &client, &report).await {
370 Ok(true) => {
371 sent += 1;
372 if let Err(err) = tokio::fs::remove_file(&path).await {
373 tracing::warn!(
374 path = %path.display(),
375 error = %err,
376 "Failed deleting uploaded crash report"
377 );
378 }
379 }
380 Ok(false) => {
381 failed += 1;
382 status_failures += 1;
383 if status_failures >= MAX_STATUS_FAILURES_PER_FLUSH {
384 tracing::warn!(
385 failures = status_failures,
386 endpoint = %settings.endpoint,
387 "Crash report uploads are being rejected; stopping this flush to avoid log spam"
388 );
389 break;
390 }
391 }
392 Err(err) => {
393 failed += 1;
394 transport_error = Some(err);
397 break;
398 }
399 }
400 }
401
402 if sent > 0 {
406 let _ = save_upload_state(
407 CrashUploadState {
408 consecutive_failures: 0,
409 next_attempt_at: None,
410 last_attempt_at: Some(Utc::now()),
411 last_error: None,
412 },
413 settings,
414 )
415 .await;
416 } else if failed > 0 {
417 let err_str = transport_error
418 .as_ref()
419 .map(|e| truncate_with_ellipsis(&format!("{e}"), 300));
420
421 let _ = bump_upload_backoff(err_str, settings).await;
422
423 if let Some(err) = transport_error {
424 tracing::warn!(
425 endpoint = %settings.endpoint,
426 error = %err,
427 "Crash report upload failed (transport). Backing off and will retry later."
428 );
429 }
430 }
431
432 tracing::info!(
433 sent = sent,
434 failed = failed,
435 endpoint = %settings.endpoint,
436 "Crash report sync complete"
437 );
438
439 Ok(())
440}
441
442async fn should_skip_upload_due_to_backoff(settings: &CrashReporterSettings) -> bool {
443 let Some(state) = load_upload_state(settings).await else {
444 return false;
445 };
446
447 let Some(next) = state.next_attempt_at else {
448 return false;
449 };
450
451 let now = Utc::now();
452 if next > now {
453 let secs = (next - now).num_seconds().max(0);
454 tracing::info!(
455 endpoint = %settings.endpoint,
456 retry_in_secs = secs,
457 consecutive_failures = state.consecutive_failures,
458 "Skipping crash report upload due to backoff"
459 );
460 return true;
461 }
462
463 false
464}
465
466async fn load_upload_state(settings: &CrashReporterSettings) -> Option<CrashUploadState> {
467 let path = crash_upload_state_path(settings);
468 let raw = tokio::fs::read_to_string(&path).await.ok()?;
469 serde_json::from_str(&raw).ok()
470}
471
472async fn save_upload_state(
473 state: CrashUploadState,
474 settings: &CrashReporterSettings,
475) -> Result<()> {
476 let path = crash_upload_state_path(settings);
477 if let Some(parent) = path.parent() {
478 tokio::fs::create_dir_all(parent)
479 .await
480 .with_context(|| format!("create crash telemetry dir {}", parent.display()))?;
481 }
482 let raw = serde_json::to_string_pretty(&state).context("serialize crash upload state")?;
483 tokio::fs::write(&path, raw)
484 .await
485 .with_context(|| format!("write crash upload state {}", path.display()))?;
486 Ok(())
487}
488
489async fn bump_upload_backoff(
490 last_error: Option<String>,
491 settings: &CrashReporterSettings,
492) -> Result<()> {
493 let mut state = load_upload_state(settings).await.unwrap_or_default();
494 state.consecutive_failures = state.consecutive_failures.saturating_add(1);
495 state.last_attempt_at = Some(Utc::now());
496 state.last_error = last_error;
497
498 let backoff = compute_backoff_secs(state.consecutive_failures);
499 state.next_attempt_at = Some(Utc::now() + chrono::Duration::seconds(backoff as i64));
500
501 save_upload_state(state, settings).await
502}
503
504fn compute_backoff_secs(consecutive_failures: u32) -> u64 {
505 let exp = consecutive_failures.saturating_sub(1).min(10);
508 let factor = 1u64.checked_shl(exp).unwrap_or(u64::MAX);
509 let backoff = MIN_UPLOAD_BACKOFF_SECS.saturating_mul(factor);
510 backoff.clamp(MIN_UPLOAD_BACKOFF_SECS, MAX_UPLOAD_BACKOFF_SECS)
511}
512
513fn crash_upload_state_path(settings: &CrashReporterSettings) -> PathBuf {
514 let data_dir = settings
519 .report_dir
520 .parent()
521 .map(Path::to_path_buf)
522 .unwrap_or_else(codetether_data_dir);
523 data_dir.join("telemetry").join(CRASH_UPLOAD_STATE_FILE)
524}
525
526async fn upload_report(
527 settings: &CrashReporterSettings,
528 client: &reqwest::Client,
529 report: &CrashReport,
530) -> Result<bool> {
531 let envelope = CrashEnvelope {
532 schema: CRASH_SCHEMA,
533 source: CRASH_SOURCE,
534 sent_at: Utc::now(),
535 install_id: &settings.install_id,
536 report,
537 };
538
539 let response = build_upload_request(settings, client, report)
540 .json(&envelope)
541 .send()
542 .await
543 .context("send schema crash payload")?;
544
545 let status = response.status();
546 if status.is_success() {
547 return Ok(true);
548 }
549
550 if should_retry_with_legacy_payload(status) {
551 tracing::info!(
552 status = %status,
553 report_id = %report.report_id,
554 "Crash endpoint rejected schema envelope; trying legacy payload"
555 );
556
557 let legacy_response = build_upload_request(settings, client, report)
558 .header("X-CodeTether-Payload", "legacy")
559 .json(report)
560 .send()
561 .await
562 .context("send legacy crash payload")?;
563
564 if legacy_response.status().is_success() {
565 return Ok(true);
566 }
567
568 tracing::warn!(
569 status = %legacy_response.status(),
570 report_id = %report.report_id,
571 "Crash report upload failed for both schema and legacy payloads"
572 );
573 return Ok(false);
574 }
575
576 tracing::warn!(
577 status = %status,
578 report_id = %report.report_id,
579 "Crash report upload rejected"
580 );
581 Ok(false)
582}
583
584fn build_upload_request(
585 settings: &CrashReporterSettings,
586 client: &reqwest::Client,
587 report: &CrashReport,
588) -> reqwest::RequestBuilder {
589 let mut request = client
590 .post(&settings.endpoint)
591 .header("X-CodeTether-Schema", CRASH_SCHEMA)
592 .header("X-CodeTether-Source", CRASH_SOURCE)
593 .header("X-CodeTether-Install-Id", &settings.install_id)
594 .header("X-CodeTether-Report-Id", &report.report_id)
595 .header("X-CodeTether-App-Version", &settings.app_version);
596
597 if let Some(token) = &settings.auth_token {
598 request = request.bearer_auth(token);
599 }
600 if let Some(api_key) = &settings.api_key {
601 request = request.header("X-CodeTether-API-Key", api_key);
602 }
603
604 request
605}
606
607fn should_retry_with_legacy_payload(status: StatusCode) -> bool {
608 matches!(
609 status,
610 StatusCode::BAD_REQUEST
611 | StatusCode::NOT_FOUND
612 | StatusCode::UNSUPPORTED_MEDIA_TYPE
613 | StatusCode::UNPROCESSABLE_ENTITY
614 )
615}
616
617fn crash_report_dir() -> PathBuf {
618 codetether_data_dir().join("crash-reports")
619}
620
621fn install_id_path() -> PathBuf {
622 codetether_data_dir().join("telemetry").join("install_id")
623}
624
625fn codetether_data_dir() -> PathBuf {
626 Config::data_dir().unwrap_or_else(|| PathBuf::from("/tmp/codetether-agent"))
627}
628
629fn load_or_create_install_id() -> Result<String> {
630 let path = install_id_path();
631 if let Ok(existing) = std::fs::read_to_string(&path) {
632 let trimmed = existing.trim();
633 if !trimmed.is_empty() {
634 return Ok(trimmed.to_string());
635 }
636 }
637
638 if let Some(parent) = path.parent() {
639 std::fs::create_dir_all(parent)
640 .with_context(|| format!("create telemetry directory {}", parent.display()))?;
641 }
642
643 let new_id = Uuid::new_v4().to_string();
644 std::fs::write(&path, format!("{new_id}\n"))
645 .with_context(|| format!("write install id {}", path.display()))?;
646 Ok(new_id)
647}
648
649fn env_non_empty(key: &str) -> Option<String> {
650 std::env::var(key)
651 .ok()
652 .map(|value| value.trim().to_string())
653 .filter(|value| !value.is_empty())
654}
655
656async fn persist_consent_choice(enabled: bool) -> Result<()> {
657 let enabled_value = if enabled { "true" } else { "false" };
658 Config::set("telemetry.crash_reporting", enabled_value).await?;
659 Config::set("telemetry.crash_reporting_prompted", "true").await?;
660 Ok(())
661}
662
663fn prompt_yes_no(prompt: &str) -> io::Result<bool> {
664 let mut stdout = io::stdout();
665
666 loop {
667 write!(stdout, "{prompt}")?;
668 stdout.flush()?;
669
670 let mut input = String::new();
671 io::stdin().read_line(&mut input)?;
672
673 let normalized = input.trim().to_ascii_lowercase();
674 if normalized.is_empty() {
675 return Ok(false);
676 }
677 if matches!(normalized.as_str(), "y" | "yes") {
678 return Ok(true);
679 }
680 if matches!(normalized.as_str(), "n" | "no") {
681 return Ok(false);
682 }
683
684 writeln!(stdout, "Please answer 'y' or 'n'.")?;
685 }
686}
687
688fn panic_payload_to_string(panic_info: &panic::PanicHookInfo<'_>) -> String {
689 if let Some(msg) = panic_info.payload().downcast_ref::<&str>() {
690 (*msg).to_string()
691 } else if let Some(msg) = panic_info.payload().downcast_ref::<String>() {
692 msg.clone()
693 } else {
694 "non-string panic payload".to_string()
695 }
696}
697
698fn truncate_with_ellipsis(value: &str, max_chars: usize) -> String {
699 if max_chars == 0 {
700 return String::new();
701 }
702
703 let mut chars = value.chars();
704 let mut output = String::new();
705 for _ in 0..max_chars {
706 if let Some(ch) = chars.next() {
707 output.push(ch);
708 } else {
709 return value.to_string();
710 }
711 }
712
713 if chars.next().is_some() {
714 format!("{output}...")
715 } else {
716 output
717 }
718}
719
720#[cfg(test)]
721mod tests {
722 use super::*;
723
724 #[test]
725 fn backoff_increases_and_clamps() {
726 assert_eq!(compute_backoff_secs(1), MIN_UPLOAD_BACKOFF_SECS);
728 assert_eq!(compute_backoff_secs(2), MIN_UPLOAD_BACKOFF_SECS * 2);
730 assert_eq!(compute_backoff_secs(10_000), MAX_UPLOAD_BACKOFF_SECS);
732 }
733}