Skip to main content

chasm_cli/
telemetry.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: Apache-2.0
3//! Telemetry module for anonymous usage data collection
4//!
5//! This module provides opt-in (by default) anonymous usage telemetry to help
6//! improve Chasm. No personal data is collected - only aggregate usage statistics.
7
8use crate::error::{CsmError, Result};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs::{self, File, OpenOptions};
12use std::io::{BufRead, BufReader, Write};
13use std::path::PathBuf;
14use uuid::Uuid;
15
16/// Telemetry configuration stored on disk
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct TelemetryConfig {
19    /// Whether telemetry is enabled (opt-in by default)
20    pub enabled: bool,
21
22    /// Anonymous identifier for this installation
23    pub installation_id: String,
24
25    /// When the config was first created
26    pub created_at: i64,
27
28    /// When the user last changed their preference
29    pub preference_changed_at: Option<i64>,
30
31    /// Version of the config format
32    pub version: u32,
33
34    /// Remote telemetry endpoint URL (optional)
35    #[serde(default)]
36    pub remote_endpoint: Option<String>,
37
38    /// API key for remote endpoint (optional)
39    #[serde(default)]
40    pub remote_api_key: Option<String>,
41
42    /// Whether to send telemetry to remote endpoint
43    #[serde(default)]
44    pub remote_enabled: bool,
45}
46
47impl Default for TelemetryConfig {
48    fn default() -> Self {
49        Self {
50            enabled: true, // Opt-in by default as requested
51            installation_id: Uuid::new_v4().to_string(),
52            created_at: chrono::Utc::now().timestamp(),
53            preference_changed_at: None,
54            version: 1,
55            remote_endpoint: None,
56            remote_api_key: None,
57            remote_enabled: false,
58        }
59    }
60}
61
62impl TelemetryConfig {
63    /// Get the path to the telemetry config file
64    pub fn config_path() -> Result<PathBuf> {
65        let config_dir = if cfg!(target_os = "windows") {
66            dirs::config_dir().map(|p| p.join("chasm"))
67        } else if cfg!(target_os = "macos") {
68            dirs::home_dir().map(|p| p.join(".config/chasm"))
69        } else {
70            dirs::home_dir().map(|p| p.join(".config/chasm"))
71        };
72
73        config_dir
74            .map(|p| p.join("telemetry.json"))
75            .ok_or(CsmError::StorageNotFound)
76    }
77
78    /// Load telemetry config from disk, creating default if not exists
79    pub fn load() -> Result<Self> {
80        let config_path = Self::config_path()?;
81
82        if config_path.exists() {
83            let content = fs::read_to_string(&config_path)?;
84            serde_json::from_str(&content)
85                .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))
86        } else {
87            // Create default config (opt-in by default)
88            let config = Self::default();
89            config.save()?;
90            Ok(config)
91        }
92    }
93
94    /// Save telemetry config to disk
95    pub fn save(&self) -> Result<()> {
96        let config_path = Self::config_path()?;
97
98        // Create parent directory if it doesn't exist
99        if let Some(parent) = config_path.parent() {
100            fs::create_dir_all(parent)?;
101        }
102
103        let content = serde_json::to_string_pretty(self)
104            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
105        fs::write(&config_path, content)?;
106
107        Ok(())
108    }
109
110    /// Enable telemetry
111    pub fn opt_in(&mut self) -> Result<()> {
112        self.enabled = true;
113        self.preference_changed_at = Some(chrono::Utc::now().timestamp());
114        self.save()
115    }
116
117    /// Disable telemetry
118    pub fn opt_out(&mut self) -> Result<()> {
119        self.enabled = false;
120        self.preference_changed_at = Some(chrono::Utc::now().timestamp());
121        self.save()
122    }
123
124    /// Reset installation ID (generates new anonymous identifier)
125    pub fn reset_id(&mut self) -> Result<()> {
126        self.installation_id = Uuid::new_v4().to_string();
127        self.preference_changed_at = Some(chrono::Utc::now().timestamp());
128        self.save()
129    }
130
131    /// Check if telemetry is enabled
132    pub fn is_enabled(&self) -> bool {
133        self.enabled
134    }
135
136    /// Configure remote endpoint
137    pub fn set_remote_endpoint(&mut self, endpoint: Option<String>) -> Result<()> {
138        self.remote_endpoint = endpoint;
139        self.preference_changed_at = Some(chrono::Utc::now().timestamp());
140        self.save()
141    }
142
143    /// Configure remote API key
144    pub fn set_remote_api_key(&mut self, api_key: Option<String>) -> Result<()> {
145        self.remote_api_key = api_key;
146        self.preference_changed_at = Some(chrono::Utc::now().timestamp());
147        self.save()
148    }
149
150    /// Enable/disable remote sending
151    pub fn set_remote_enabled(&mut self, enabled: bool) -> Result<()> {
152        self.remote_enabled = enabled;
153        self.preference_changed_at = Some(chrono::Utc::now().timestamp());
154        self.save()
155    }
156
157    /// Check if remote telemetry is configured and enabled
158    pub fn is_remote_enabled(&self) -> bool {
159        self.remote_enabled && self.remote_endpoint.is_some() && self.remote_api_key.is_some()
160    }
161}
162
163/// Types of telemetry events we track
164#[derive(Debug, Clone, Serialize, Deserialize)]
165#[serde(tag = "type")]
166pub enum TelemetryEvent {
167    /// CLI command invoked
168    CommandInvoked {
169        command: String,
170        subcommand: Option<String>,
171        duration_ms: Option<u64>,
172        success: bool,
173    },
174
175    /// Session harvested from a provider
176    SessionHarvested {
177        provider: String,
178        session_count: u32,
179    },
180
181    /// Sessions merged
182    SessionsMerged { session_count: u32 },
183
184    /// API server started
185    ApiServerStarted { port: u16 },
186
187    /// Provider detected
188    ProviderDetected { provider: String },
189
190    /// Error occurred (no PII, just error type)
191    ErrorOccurred { error_type: String },
192}
193
194/// Telemetry collector that batches and sends events
195#[derive(Debug)]
196pub struct TelemetryCollector {
197    config: TelemetryConfig,
198    events: Vec<TelemetryEvent>,
199}
200
201impl TelemetryCollector {
202    /// Create a new telemetry collector
203    pub fn new() -> Result<Self> {
204        let config = TelemetryConfig::load()?;
205        Ok(Self {
206            config,
207            events: Vec::new(),
208        })
209    }
210
211    /// Check if telemetry is enabled
212    pub fn is_enabled(&self) -> bool {
213        self.config.is_enabled()
214    }
215
216    /// Track a telemetry event
217    pub fn track(&mut self, event: TelemetryEvent) {
218        if self.is_enabled() {
219            self.events.push(event);
220        }
221    }
222
223    /// Track a CLI command invocation
224    pub fn track_command(&mut self, command: &str, subcommand: Option<&str>, success: bool) {
225        self.track(TelemetryEvent::CommandInvoked {
226            command: command.to_string(),
227            subcommand: subcommand.map(|s| s.to_string()),
228            duration_ms: None,
229            success,
230        });
231    }
232
233    /// Get the installation ID
234    pub fn installation_id(&self) -> &str {
235        &self.config.installation_id
236    }
237
238    /// Flush events (in future: send to telemetry endpoint)
239    /// Currently just clears the buffer - actual sending will be implemented later
240    pub fn flush(&mut self) -> Result<()> {
241        if !self.is_enabled() || self.events.is_empty() {
242            return Ok(());
243        }
244
245        // TODO: In future versions, send events to telemetry endpoint
246        // For now, we just clear the buffer
247        // The endpoint and sending logic will be added when the backend is ready
248
249        self.events.clear();
250        Ok(())
251    }
252}
253
254impl Drop for TelemetryCollector {
255    fn drop(&mut self) {
256        // Try to flush remaining events on drop
257        let _ = self.flush();
258    }
259}
260
261/// What data is collected (for user information)
262pub const TELEMETRY_INFO: &str = r#"
263Chasm collects anonymous usage data to help improve the product.
264
265WHAT WE COLLECT:
266  • Commands used (e.g., 'harvest', 'merge', 'export')
267  • Provider types detected (e.g., 'copilot', 'cursor', 'ollama')
268  • Session counts (numbers only, no content)
269  • Error types (no personal details or file paths)
270  • Anonymous installation ID (randomly generated UUID)
271
272WHAT WE DO NOT COLLECT:
273  • Chat messages or content
274  • File paths or project names
275  • Personal information
276  • API keys or credentials
277  • IP addresses (beyond what's needed for HTTPS)
278
279Your installation ID: {installation_id}
280Status: {status}
281
282Manage your preference:
283  chasm telemetry opt-in   - Enable data collection (default)
284  chasm telemetry opt-out  - Disable data collection
285  chasm telemetry reset    - Generate new anonymous ID
286"#;
287
288// =============================================================================
289// STRUCTURED DATA RECORDING FOR AI ANALYSIS
290// =============================================================================
291
292/// A structured telemetry record for AI analysis
293#[derive(Debug, Clone, Serialize, Deserialize)]
294pub struct TelemetryRecord {
295    /// Unique record ID
296    pub id: String,
297
298    /// Installation ID (anonymous)
299    pub installation_id: String,
300
301    /// Event category (e.g., 'workflow', 'error', 'performance', 'usage', 'custom')
302    pub category: String,
303
304    /// Event name or type
305    pub event: String,
306
307    /// Structured data payload
308    pub data: HashMap<String, serde_json::Value>,
309
310    /// Tags for filtering
311    pub tags: Vec<String>,
312
313    /// Optional context/session ID
314    pub context: Option<String>,
315
316    /// Unix timestamp when recorded
317    pub timestamp: i64,
318
319    /// Human-readable timestamp
320    pub timestamp_iso: String,
321}
322
323impl TelemetryRecord {
324    /// Create a new telemetry record
325    pub fn new(
326        installation_id: &str,
327        category: &str,
328        event: &str,
329        data: HashMap<String, serde_json::Value>,
330        tags: Vec<String>,
331        context: Option<String>,
332    ) -> Self {
333        let now = chrono::Utc::now();
334        Self {
335            id: Uuid::new_v4().to_string(),
336            installation_id: installation_id.to_string(),
337            category: category.to_string(),
338            event: event.to_string(),
339            data,
340            tags,
341            context,
342            timestamp: now.timestamp(),
343            timestamp_iso: now.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string(),
344        }
345    }
346}
347
348/// Storage for telemetry records (JSONL file for easy streaming/appending)
349pub struct TelemetryStore {
350    config: TelemetryConfig,
351}
352
353impl TelemetryStore {
354    /// Create a new telemetry store
355    pub fn new() -> Result<Self> {
356        let config = TelemetryConfig::load()?;
357        Ok(Self { config })
358    }
359
360    /// Get path to the telemetry records file
361    pub fn records_path() -> Result<PathBuf> {
362        let config_dir = if cfg!(target_os = "windows") {
363            dirs::config_dir().map(|p| p.join("chasm"))
364        } else {
365            dirs::home_dir().map(|p| p.join(".config/chasm"))
366        };
367
368        config_dir
369            .map(|p| p.join("telemetry_records.jsonl"))
370            .ok_or(CsmError::StorageNotFound)
371    }
372
373    /// Record a new telemetry event
374    pub fn record(
375        &self,
376        category: &str,
377        event: &str,
378        data: HashMap<String, serde_json::Value>,
379        tags: Vec<String>,
380        context: Option<String>,
381    ) -> Result<TelemetryRecord> {
382        let record = TelemetryRecord::new(
383            &self.config.installation_id,
384            category,
385            event,
386            data,
387            tags,
388            context,
389        );
390
391        // Append to JSONL file
392        let path = Self::records_path()?;
393        if let Some(parent) = path.parent() {
394            fs::create_dir_all(parent)?;
395        }
396
397        let mut file = OpenOptions::new().create(true).append(true).open(&path)?;
398
399        let line = serde_json::to_string(&record)
400            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
401        writeln!(file, "{}", line)?;
402
403        Ok(record)
404    }
405
406    /// Read all records, optionally filtered
407    pub fn read_records(
408        &self,
409        category: Option<&str>,
410        event: Option<&str>,
411        tag: Option<&str>,
412        after: Option<i64>,
413        before: Option<i64>,
414        limit: Option<usize>,
415    ) -> Result<Vec<TelemetryRecord>> {
416        let path = Self::records_path()?;
417        if !path.exists() {
418            return Ok(Vec::new());
419        }
420
421        let file = File::open(&path)?;
422        let reader = BufReader::new(file);
423        let mut records: Vec<TelemetryRecord> = Vec::new();
424
425        for line in reader.lines() {
426            let line = line?;
427            if line.trim().is_empty() {
428                continue;
429            }
430
431            let record: TelemetryRecord = serde_json::from_str(&line)
432                .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
433
434            // Apply filters
435            if let Some(cat) = category {
436                if record.category != cat {
437                    continue;
438                }
439            }
440            if let Some(evt) = event {
441                if record.event != evt {
442                    continue;
443                }
444            }
445            if let Some(t) = tag {
446                if !record.tags.contains(&t.to_string()) {
447                    continue;
448                }
449            }
450            if let Some(after_ts) = after {
451                if record.timestamp < after_ts {
452                    continue;
453                }
454            }
455            if let Some(before_ts) = before {
456                if record.timestamp > before_ts {
457                    continue;
458                }
459            }
460
461            records.push(record);
462        }
463
464        // Sort by timestamp descending (newest first)
465        records.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
466
467        // Apply limit
468        if let Some(lim) = limit {
469            records.truncate(lim);
470        }
471
472        Ok(records)
473    }
474
475    /// Get record count
476    pub fn count_records(&self) -> Result<usize> {
477        let path = Self::records_path()?;
478        if !path.exists() {
479            return Ok(0);
480        }
481
482        let file = File::open(&path)?;
483        let reader = BufReader::new(file);
484        Ok(reader.lines().filter(|l| l.is_ok()).count())
485    }
486
487    /// Clear records (optionally older than N days)
488    pub fn clear_records(&self, older_than_days: Option<u32>) -> Result<usize> {
489        let path = Self::records_path()?;
490        if !path.exists() {
491            return Ok(0);
492        }
493
494        if older_than_days.is_none() {
495            // Delete entire file
496            let count = self.count_records()?;
497            fs::remove_file(&path)?;
498            return Ok(count);
499        }
500
501        // Filter out old records
502        let cutoff =
503            chrono::Utc::now().timestamp() - (older_than_days.unwrap() as i64 * 24 * 60 * 60);
504
505        let file = File::open(&path)?;
506        let reader = BufReader::new(file);
507        let mut kept_records: Vec<String> = Vec::new();
508        let mut removed_count = 0;
509
510        for line in reader.lines() {
511            let line = line?;
512            if line.trim().is_empty() {
513                continue;
514            }
515
516            let record: TelemetryRecord = serde_json::from_str(&line)
517                .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
518
519            if record.timestamp >= cutoff {
520                kept_records.push(line);
521            } else {
522                removed_count += 1;
523            }
524        }
525
526        // Rewrite file with kept records
527        let mut file = File::create(&path)?;
528        for line in kept_records {
529            writeln!(file, "{}", line)?;
530        }
531
532        Ok(removed_count)
533    }
534
535    /// Export records to a file
536    pub fn export_records(
537        &self,
538        output_path: &str,
539        format: &str,
540        category: Option<&str>,
541        with_metadata: bool,
542    ) -> Result<usize> {
543        let records = self.read_records(category, None, None, None, None, None)?;
544
545        if records.is_empty() {
546            return Ok(0);
547        }
548
549        let mut file = File::create(output_path)?;
550
551        match format {
552            "json" => {
553                if with_metadata {
554                    let export = serde_json::json!({
555                        "installation_id": self.config.installation_id,
556                        "exported_at": chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string(),
557                        "record_count": records.len(),
558                        "records": records,
559                    });
560                    let content = serde_json::to_string_pretty(&export)
561                        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
562                    write!(file, "{}", content)?;
563                } else {
564                    let content = serde_json::to_string_pretty(&records)
565                        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
566                    write!(file, "{}", content)?;
567                }
568            }
569            "jsonl" => {
570                if with_metadata {
571                    let meta = serde_json::json!({
572                        "_type": "metadata",
573                        "installation_id": self.config.installation_id,
574                        "exported_at": chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string(),
575                        "record_count": records.len(),
576                    });
577                    writeln!(
578                        file,
579                        "{}",
580                        serde_json::to_string(&meta)
581                            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?
582                    )?;
583                }
584                for record in &records {
585                    let line = serde_json::to_string(record)
586                        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
587                    writeln!(file, "{}", line)?;
588                }
589            }
590            "csv" => {
591                // Write CSV header
592                writeln!(
593                    file,
594                    "id,timestamp,timestamp_iso,category,event,tags,context,data"
595                )?;
596                for record in &records {
597                    let tags = record.tags.join(";");
598                    let context = record.context.clone().unwrap_or_default();
599                    let data = serde_json::to_string(&record.data)
600                        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
601                    // Escape CSV fields
602                    let data_escaped = data.replace('"', "\"\"");
603                    writeln!(
604                        file,
605                        "{},{},\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\"",
606                        record.id,
607                        record.timestamp,
608                        record.timestamp_iso,
609                        record.category,
610                        record.event,
611                        tags,
612                        context,
613                        data_escaped
614                    )?;
615                }
616            }
617            _ => {
618                return Err(CsmError::InvalidSessionFormat(format!(
619                    "Unknown export format: {}",
620                    format
621                ))
622                .into());
623            }
624        }
625
626        Ok(records.len())
627    }
628
629    /// Get installation ID
630    pub fn installation_id(&self) -> &str {
631        &self.config.installation_id
632    }
633
634    /// Sync records to remote endpoint
635    pub fn sync_to_remote(&self, limit: Option<usize>) -> Result<SyncResult> {
636        if !self.config.is_remote_enabled() {
637            return Err(CsmError::InvalidSessionFormat(
638                "Remote telemetry not configured. Use 'chasm telemetry config' to set endpoint and API key".to_string()
639            ).into());
640        }
641
642        let endpoint = self.config.remote_endpoint.as_ref().unwrap();
643        let api_key = self.config.remote_api_key.as_ref().unwrap();
644
645        // Read records to sync
646        let records = self.read_records(None, None, None, None, None, limit)?;
647
648        if records.is_empty() {
649            return Ok(SyncResult {
650                records_sent: 0,
651                success: true,
652                error: None,
653            });
654        }
655
656        // Build the request payload
657        let payload = serde_json::json!({
658            "installation_id": self.config.installation_id,
659            "records": records,
660        });
661
662        // Send to remote endpoint
663        let client = reqwest::blocking::Client::new();
664        let response = client
665            .post(format!("{}/ingest", endpoint.trim_end_matches('/')))
666            .header("Content-Type", "application/json")
667            .header("X-Api-Key", api_key)
668            .json(&payload)
669            .send();
670
671        match response {
672            Ok(resp) => {
673                if resp.status().is_success() {
674                    Ok(SyncResult {
675                        records_sent: records.len(),
676                        success: true,
677                        error: None,
678                    })
679                } else {
680                    let status = resp.status();
681                    let error_text = resp.text().unwrap_or_else(|_| "Unknown error".to_string());
682                    Ok(SyncResult {
683                        records_sent: 0,
684                        success: false,
685                        error: Some(format!("HTTP {}: {}", status, error_text)),
686                    })
687                }
688            }
689            Err(e) => Ok(SyncResult {
690                records_sent: 0,
691                success: false,
692                error: Some(format!("Request failed: {}", e)),
693            }),
694        }
695    }
696
697    /// Get the config
698    pub fn config(&self) -> &TelemetryConfig {
699        &self.config
700    }
701}
702
703/// Result of a sync operation
704#[derive(Debug)]
705pub struct SyncResult {
706    pub records_sent: usize,
707    pub success: bool,
708    pub error: Option<String>,
709}