Skip to main content

statsai_core/
lib.rs

1//! Core schemas and ID helpers for `statsai`.
2
3use chrono::{DateTime, Utc};
4use schemars::JsonSchema;
5use serde::{Deserialize, Serialize};
6use sha2::{Digest, Sha256};
7use std::path::{Path, PathBuf};
8
9pub const USAGE_EVENT_SCHEMA_VERSION: &str = "usage_event.v1";
10pub const USAGE_SUMMARY_SCHEMA_VERSION: &str = "usage_summary.v1";
11pub const REPORTED_USAGE_SUMMARY_INPUT_SCHEMA_VERSION: &str = "reported_usage_summary_input.v1";
12pub const SOURCE_LOCATION_SCHEMA_VERSION: &str = "source_location.v1";
13pub const PROVIDER_ACCOUNT_SCHEMA_VERSION: &str = "provider_account.v1";
14pub const SOURCE_ACCOUNT_ASSIGNMENT_SCHEMA_VERSION: &str = "source_account_assignment.v1";
15pub const SUBSCRIPTION_SCHEMA_VERSION: &str = "subscription.v1";
16pub const DAILY_ROLLUP_SCHEMA_VERSION: &str = "daily_rollup.v1";
17pub const SYNC_BATCH_SCHEMA_VERSION: &str = "sync_batch.v1";
18pub const SYNC_ACK_SCHEMA_VERSION: &str = "sync_ack.v1";
19
20#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
21#[serde(transparent)]
22pub struct SourceId(pub String);
23
24#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
25#[serde(transparent)]
26pub struct ProviderAccountId(pub String);
27
28#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
29#[serde(transparent)]
30pub struct SubscriptionId(pub String);
31
32#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
33#[serde(transparent)]
34pub struct SourceAccountAssignmentId(pub String);
35
36#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
37#[serde(transparent)]
38pub struct EventId(pub String);
39
40#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
41#[serde(transparent)]
42pub struct SummaryId(pub String);
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
45#[serde(rename_all = "snake_case")]
46pub enum SourceKind {
47    LocalAdapter,
48    LocalSummary,
49    LocalApi,
50    ProviderApi,
51    CliProbe,
52    SdkInstrumented,
53    ExternalReport,
54    Manual,
55}
56
57#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
58#[serde(rename_all = "snake_case")]
59pub enum LocationOrigin {
60    Default,
61    Configured,
62    Env,
63    Discovered,
64}
65
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
67#[serde(rename_all = "snake_case")]
68pub enum Confidence {
69    Low,
70    Medium,
71    High,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
75#[serde(rename_all = "snake_case")]
76pub enum IdentitySource {
77    ProviderAuth,
78    ProviderApi,
79    CliProbe,
80    SourceConfig,
81    UserConfigured,
82    ManualHint,
83    LocalAuth,
84    CookieOauth,
85    Unresolved,
86    Unknown,
87}
88
89#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
90#[serde(rename_all = "snake_case")]
91pub enum BillingPeriod {
92    Monthly,
93    Annual,
94    Custom,
95}
96
97#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
98#[serde(rename_all = "snake_case")]
99pub enum SubscriptionStatus {
100    Active,
101    Paused,
102    Cancelled,
103}
104
105#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
106#[serde(rename_all = "snake_case")]
107pub enum PrivacyMode {
108    MetadataOnly,
109    TitlesLabels,
110    EnrichedSummaries,
111}
112
113#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)]
114#[serde(rename_all = "snake_case")]
115pub enum SourceVerificationMode {
116    #[default]
117    Auto,
118    ManualOnly,
119    Disabled,
120}
121
122#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
123pub struct SourceLocation {
124    pub schema_version: String,
125    pub source_id: SourceId,
126    pub provider: String,
127    pub source_kind: SourceKind,
128    pub location_origin: LocationOrigin,
129    pub adapter_id: Option<String>,
130    pub adapter_version: Option<String>,
131    pub path_hash: Option<String>,
132    pub path_label: Option<String>,
133    pub enabled: bool,
134    #[serde(default)]
135    pub verification_mode: SourceVerificationMode,
136    #[serde(default)]
137    pub verified_state_hash: Option<String>,
138    pub created_at: DateTime<Utc>,
139    pub updated_at: DateTime<Utc>,
140}
141
142#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
143pub struct ProviderAccount {
144    pub schema_version: String,
145    pub provider_account_id: ProviderAccountId,
146    pub provider: String,
147    pub identity_source: IdentitySource,
148    pub provider_user_id: Option<String>,
149    pub email: Option<String>,
150    pub provider_user_id_hash: Option<String>,
151    pub email_hash: Option<String>,
152    pub org_id_hash: Option<String>,
153    pub account_label: Option<String>,
154    pub plan_name: Option<String>,
155    pub confidence: Confidence,
156    pub verified_at: Option<DateTime<Utc>>,
157    pub created_at: DateTime<Utc>,
158    pub updated_at: DateTime<Utc>,
159}
160
161#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
162pub struct SourceAccountAssignment {
163    pub schema_version: String,
164    pub assignment_id: SourceAccountAssignmentId,
165    pub source_id: SourceId,
166    pub provider: String,
167    pub provider_account_id: ProviderAccountId,
168    pub started_at: DateTime<Utc>,
169    pub ended_at: Option<DateTime<Utc>>,
170    #[serde(default = "default_identity_source_unknown")]
171    pub record_source: IdentitySource,
172    pub verified_at: Option<DateTime<Utc>>,
173    pub created_at: DateTime<Utc>,
174    pub updated_at: DateTime<Utc>,
175}
176
177#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
178pub struct Subscription {
179    pub schema_version: String,
180    pub subscription_id: SubscriptionId,
181    pub provider: String,
182    pub provider_account_id: ProviderAccountId,
183    pub plan_name: String,
184    pub price: i64, // minor units (cents) of the currency
185    pub currency: String,
186    pub billing_period: BillingPeriod,
187    pub paid_at: Option<DateTime<Utc>>,
188    pub renewal_day: Option<u8>,
189    pub started_at: DateTime<Utc>,
190    pub ended_at: Option<DateTime<Utc>>,
191    pub current_period_ends_at: Option<DateTime<Utc>>,
192    pub status: SubscriptionStatus,
193    #[serde(default = "default_identity_source_unknown")]
194    pub record_source: IdentitySource,
195    pub verified_at: Option<DateTime<Utc>>,
196    pub notes: Option<String>,
197}
198
199#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
200pub struct VerifiedSourceState {
201    pub provider_user_id: Option<String>,
202    pub email: Option<String>,
203    pub account_label: Option<String>,
204    pub plan_name: Option<String>,
205    pub authenticated_at: Option<DateTime<Utc>>,
206    pub verified_at: Option<DateTime<Utc>>,
207    pub subscription: Option<VerifiedSubscriptionState>,
208}
209
210#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
211pub struct VerifiedSubscriptionState {
212    pub plan_name: String,
213    pub price: i64, // minor units (cents) of the currency
214    pub currency: String,
215    pub billing_period: BillingPeriod,
216    pub paid_at: Option<DateTime<Utc>>,
217    pub started_at: DateTime<Utc>,
218    pub ended_at: Option<DateTime<Utc>>,
219    pub current_period_ends_at: Option<DateTime<Utc>>,
220    pub status: SubscriptionStatus,
221    pub verified_at: Option<DateTime<Utc>>,
222}
223
224#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
225pub struct EventSource {
226    pub adapter_id: String,
227    pub adapter_version: String,
228    pub source_kind: SourceKind,
229    pub location_origin: Option<LocationOrigin>,
230    pub source_type: String,
231    pub source_path_hash: Option<String>,
232    pub source_record_id: Option<String>,
233    pub parse_confidence: Confidence,
234}
235
236#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
237pub struct SessionInfo {
238    pub session_id: String,
239    pub local_session_id_hash: Option<String>,
240    pub title: Option<String>,
241    pub started_at: DateTime<Utc>,
242    pub ended_at: Option<DateTime<Utc>>,
243    pub duration_seconds: Option<u64>,
244}
245
246#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
247#[serde(rename_all = "snake_case")]
248pub enum ReasoningLevel {
249    Low,
250    Medium,
251    High,
252    Xhigh,
253    Max,
254}
255
256impl ReasoningLevel {
257    #[must_use]
258    pub fn parse(value: &str) -> Option<Self> {
259        match value.trim() {
260            "low" => Some(Self::Low),
261            "medium" => Some(Self::Medium),
262            "high" => Some(Self::High),
263            "xhigh" => Some(Self::Xhigh),
264            "max" => Some(Self::Max),
265            _ => None,
266        }
267    }
268
269    #[must_use]
270    pub fn as_str(self) -> &'static str {
271        match self {
272            Self::Low => "low",
273            Self::Medium => "medium",
274            Self::High => "high",
275            Self::Xhigh => "xhigh",
276            Self::Max => "max",
277        }
278    }
279}
280
281#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize, JsonSchema)]
282pub struct ModelInfo {
283    pub name: Option<String>,
284    pub normalized_name: Option<String>,
285    pub provider_model_id: Option<String>,
286    pub reasoning_level: Option<ReasoningLevel>,
287    pub reasoning_level_raw: Option<String>,
288}
289
290#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize, JsonSchema)]
291pub struct UsageCounts {
292    pub input_tokens: Option<u64>,
293    pub output_tokens: Option<u64>,
294    pub cache_creation_tokens: Option<u64>,
295    pub cache_read_tokens: Option<u64>,
296    pub reasoning_tokens: Option<u64>,
297    pub total_tokens: Option<u64>,
298    pub requests: Option<u64>,
299    pub local_prompt_eval_tokens: Option<u64>,
300    pub local_eval_tokens: Option<u64>,
301}
302
303#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
304pub struct RuntimeInfo {
305    pub runtime_name: Option<String>,
306    pub host_id: Option<String>,
307    /// End-to-end request or turn duration, not time to first token.
308    pub latency_ms: Option<u64>,
309    /// Provenance of latency_ms when the adapter can distinguish it.
310    pub latency_source: Option<LatencySource>,
311    /// Time from request start until the first visible token arrives.
312    pub time_to_first_token_ms: Option<u64>,
313    pub prompt_eval_duration_ms: Option<u64>,
314    pub eval_duration_ms: Option<u64>,
315    pub total_messages: Option<u64>,
316    pub user_messages: Option<u64>,
317    pub assistant_messages: Option<u64>,
318    pub developer_messages: Option<u64>,
319}
320
321#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
322#[serde(rename_all = "snake_case")]
323pub enum LatencySource {
324    Explicit,
325    Inferred,
326}
327
328#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
329pub struct MetricStats {
330    pub samples: u64,
331    pub avg: Option<f64>,
332    pub min: Option<f64>,
333    pub max: Option<f64>,
334    pub p50: Option<f64>,
335    pub p95: Option<f64>,
336    pub sum: Option<f64>,
337}
338
339#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
340pub struct SummaryMetrics {
341    pub active_seconds: Option<f64>,
342    pub tracked_requests: Option<u64>,
343    pub tracked_output_tokens: Option<u64>,
344    pub tracked_reasoning_tokens: Option<u64>,
345    /// Aggregated end-to-end request or turn duration, not TTFT.
346    pub latency_ms: Option<MetricStats>,
347    pub time_to_first_token_ms: Option<MetricStats>,
348    /// Per-turn generated throughput distribution across tracked turns.
349    pub generated_tps: Option<MetricStats>,
350    /// Per-turn visible throughput distribution across tracked turns.
351    pub visible_tps: Option<MetricStats>,
352    /// Overall generated throughput across tracked active time.
353    pub overall_generated_tps: Option<f64>,
354    /// Overall visible throughput across tracked active time.
355    pub overall_visible_tps: Option<f64>,
356    pub cache_hit_ratio: Option<MetricStats>,
357    pub reasoning_share: Option<MetricStats>,
358    pub total_messages: Option<u64>,
359    pub user_messages: Option<u64>,
360    pub assistant_messages: Option<u64>,
361    pub developer_messages: Option<u64>,
362}
363
364#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
365pub struct CostInfo {
366    pub currency: String,
367    pub estimated_api_equivalent_usd: Option<i64>, // cents USD
368    pub provider_reported_usd: Option<i64>,        // cents USD
369    pub pricing_source: Option<String>,
370    pub pricing_version: Option<String>,
371    pub confidence: Confidence,
372}
373
374#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
375pub struct SummaryModelUsage {
376    pub model: ModelInfo,
377    pub usage: UsageCounts,
378    pub cost: CostInfo,
379}
380
381#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
382pub struct ParseEvidence {
383    pub event_key_version: String,
384    pub source_file_path_hash: Option<String>,
385    pub source_line_number: Option<u64>,
386    pub source_record_id: Option<String>,
387    pub model_inferred: bool,
388    pub timestamp_inferred: bool,
389    pub account_identity_source: IdentitySource,
390}
391
392#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
393pub struct ProjectInfo {
394    pub project_id: String,
395    pub project_label: Option<String>,
396    pub repo_remote_hash: Option<String>,
397    pub repo_label: Option<String>,
398    pub branch_hash: Option<String>,
399    pub branch_label: Option<String>,
400    pub path_hash: Option<String>,
401    pub path_label: Option<String>,
402}
403
404#[must_use]
405pub fn project_has_stable_identity(project: &ProjectInfo) -> bool {
406    project
407        .repo_remote_hash
408        .as_deref()
409        .is_some_and(|value| !value.trim().is_empty())
410        || project
411            .path_hash
412            .as_deref()
413            .is_some_and(|value| !value.trim().is_empty())
414}
415
416#[must_use]
417pub fn project_has_remote_identity(project: &ProjectInfo) -> bool {
418    project
419        .repo_remote_hash
420        .as_deref()
421        .is_some_and(|value| !value.trim().is_empty())
422}
423
424#[must_use]
425pub fn project_contains_file_paths(project: Option<&ProjectInfo>) -> bool {
426    project
427        .and_then(|project| project.path_label.as_deref())
428        .is_some_and(|value| !value.trim().is_empty())
429}
430
431#[must_use]
432pub fn project_bucket_key(project: Option<&ProjectInfo>) -> String {
433    let Some(project) = project else {
434        return "none".to_string();
435    };
436    if !project_has_stable_identity(project) {
437        return "none".to_string();
438    }
439    if project.path_hash.is_some()
440        || project.repo_remote_hash.is_some()
441        || project.branch_hash.is_some()
442    {
443        return format!(
444            "repo:{}|path:{}|branch:{}",
445            project.repo_remote_hash.as_deref().unwrap_or("none"),
446            project.path_hash.as_deref().unwrap_or("none"),
447            project.branch_hash.as_deref().unwrap_or("none")
448        );
449    }
450    project.project_id.clone()
451}
452
453#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
454pub struct GitInfo {
455    pub nearby_commit_hashes: Vec<String>,
456    pub nearby_commit_messages: Vec<String>,
457    pub correlation_confidence: Option<Confidence>,
458}
459
460#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
461pub struct PrivacyInfo {
462    pub mode: PrivacyMode,
463    pub contains_prompt_text: bool,
464    pub contains_response_text: bool,
465    pub contains_file_paths: bool,
466}
467
468#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
469pub struct UsageEvent {
470    pub schema_version: String,
471    pub event_id: EventId,
472    pub device_id: String,
473    pub provider: String,
474    pub source_id: SourceId,
475    pub provider_account_id: Option<ProviderAccountId>,
476    pub subscription_id: Option<SubscriptionId>,
477    pub source: EventSource,
478    pub session: SessionInfo,
479    pub model: Option<ModelInfo>,
480    pub usage: UsageCounts,
481    pub runtime: Option<RuntimeInfo>,
482    pub cost: CostInfo,
483    pub parse_evidence: Option<ParseEvidence>,
484    pub project: Option<ProjectInfo>,
485    pub git: Option<GitInfo>,
486    pub privacy: PrivacyInfo,
487    pub created_at: DateTime<Utc>,
488    pub imported_at: DateTime<Utc>,
489}
490
491#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
492pub struct SummaryMetadata {
493    pub summary_format: String,
494    pub summary_version: Option<String>,
495    pub total_sessions: Option<u64>,
496    pub total_messages: Option<u64>,
497    pub last_computed_at: Option<DateTime<Utc>>,
498}
499
500#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
501pub struct UsageSummary {
502    pub schema_version: String,
503    pub summary_id: SummaryId,
504    pub device_id: String,
505    pub provider: String,
506    pub source_id: SourceId,
507    pub provider_account_id: Option<ProviderAccountId>,
508    pub source: EventSource,
509    pub model: Option<ModelInfo>,
510    #[serde(default, skip_serializing_if = "Vec::is_empty")]
511    pub models: Vec<SummaryModelUsage>,
512    pub usage: UsageCounts,
513    pub cost: CostInfo,
514    pub parse_evidence: Option<ParseEvidence>,
515    pub project: Option<ProjectInfo>,
516    pub privacy: PrivacyInfo,
517    pub metrics: Option<SummaryMetrics>,
518    pub period_start: Option<DateTime<Utc>>,
519    pub period_end: Option<DateTime<Utc>>,
520    pub observed_at: DateTime<Utc>,
521    pub metadata: SummaryMetadata,
522    pub imported_at: DateTime<Utc>,
523}
524
525#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
526pub struct SyncBatch {
527    pub schema_version: String,
528    pub batch_id: String,
529    pub device_id: String,
530    #[serde(default, skip_serializing_if = "Vec::is_empty")]
531    pub sources: Vec<SourceLocation>,
532    #[serde(default, skip_serializing_if = "Vec::is_empty")]
533    pub accounts: Vec<ProviderAccount>,
534    #[serde(default, skip_serializing_if = "Vec::is_empty")]
535    pub source_account_assignments: Vec<SourceAccountAssignment>,
536    #[serde(default, skip_serializing_if = "Vec::is_empty")]
537    pub subscriptions: Vec<Subscription>,
538    #[serde(default, skip_serializing_if = "Vec::is_empty")]
539    pub events: Vec<UsageEvent>,
540    #[serde(default, skip_serializing_if = "Vec::is_empty")]
541    pub summaries: Vec<UsageSummary>,
542    pub created_at: DateTime<Utc>,
543}
544
545#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
546pub struct SyncEntityCounts {
547    pub sources: u64,
548    pub accounts: u64,
549    #[serde(default)]
550    pub source_account_assignments: u64,
551    pub subscriptions: u64,
552    pub events: u64,
553    pub summaries: u64,
554}
555
556#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
557pub struct SyncRejectedRecord {
558    pub kind: String,
559    pub id: Option<String>,
560    pub reason: String,
561}
562
563#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
564pub struct SyncAck {
565    pub schema_version: String,
566    pub batch_id: String,
567    pub accepted: SyncEntityCounts,
568    pub duplicates: SyncEntityCounts,
569    pub rejected: Vec<SyncRejectedRecord>,
570}
571
572#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
573pub struct DailyRollup {
574    pub schema_version: String,
575    pub date: String,
576    pub device_id: String,
577    pub total_input_tokens: u64,
578    pub total_cache_creation_tokens: u64,
579    pub total_cache_read_tokens: u64,
580    pub total_output_tokens: u64,
581    pub total_reasoning_tokens: u64,
582    pub total_tokens: u64,
583    pub total_events: u64,
584    pub total_sessions: u64,
585    pub estimated_cost_usd: Option<i64>, // cents USD
586    pub by_provider: Option<String>,
587    pub by_account: Option<String>,
588    pub updated_at: DateTime<Utc>,
589}
590
591impl SourceLocation {
592    #[must_use]
593    pub fn local_adapter(
594        provider: impl Into<String>,
595        adapter_id: impl Into<String>,
596        adapter_version: impl Into<String>,
597        path: &Path,
598        location_origin: LocationOrigin,
599    ) -> Self {
600        let provider = provider.into();
601        let adapter_id = adapter_id.into();
602        let adapter_version = adapter_version.into();
603        let path_hash = path_hash(path);
604        let now = Utc::now();
605        let source_id = source_id(&provider, SourceKind::LocalAdapter, &path_hash);
606
607        Self {
608            schema_version: SOURCE_LOCATION_SCHEMA_VERSION.to_string(),
609            source_id,
610            provider,
611            source_kind: SourceKind::LocalAdapter,
612            location_origin,
613            adapter_id: Some(adapter_id),
614            adapter_version: Some(adapter_version),
615            path_hash: Some(path_hash),
616            path_label: Some(display_path(path)),
617            enabled: true,
618            verification_mode: SourceVerificationMode::Auto,
619            verified_state_hash: None,
620            created_at: now,
621            updated_at: now,
622        }
623    }
624
625    #[must_use]
626    pub fn external_report(
627        provider: impl Into<String>,
628        adapter_id: impl Into<String>,
629        adapter_version: impl Into<String>,
630        path: &Path,
631    ) -> Self {
632        let provider = provider.into();
633        let adapter_id = adapter_id.into();
634        let adapter_version = adapter_version.into();
635        let path_hash = path_hash(path);
636        let now = Utc::now();
637        let source_id = source_id(&provider, SourceKind::ExternalReport, &path_hash);
638
639        Self {
640            schema_version: SOURCE_LOCATION_SCHEMA_VERSION.to_string(),
641            source_id,
642            provider,
643            source_kind: SourceKind::ExternalReport,
644            location_origin: LocationOrigin::Configured,
645            adapter_id: Some(adapter_id),
646            adapter_version: Some(adapter_version),
647            path_hash: Some(path_hash),
648            path_label: Some(display_path(path)),
649            enabled: true,
650            verification_mode: SourceVerificationMode::Disabled,
651            verified_state_hash: None,
652            created_at: now,
653            updated_at: now,
654        }
655    }
656
657    #[must_use]
658    pub fn reported_usage(
659        provider: impl Into<String>,
660        source_kind: SourceKind,
661        adapter_id: impl Into<String>,
662        adapter_version: impl Into<String>,
663        evidence_key: impl AsRef<str>,
664        path_label: Option<String>,
665    ) -> Self {
666        let provider = provider.into();
667        let adapter_id = adapter_id.into();
668        let adapter_version = adapter_version.into();
669        let path_hash = hash_text(evidence_key.as_ref());
670        let now = Utc::now();
671        let source_id = source_id(&provider, source_kind.clone(), &path_hash);
672
673        Self {
674            schema_version: SOURCE_LOCATION_SCHEMA_VERSION.to_string(),
675            source_id,
676            provider,
677            source_kind,
678            location_origin: LocationOrigin::Configured,
679            adapter_id: Some(adapter_id),
680            adapter_version: Some(adapter_version),
681            path_hash: Some(path_hash),
682            path_label,
683            enabled: true,
684            verification_mode: SourceVerificationMode::Disabled,
685            verified_state_hash: None,
686            created_at: now,
687            updated_at: now,
688        }
689    }
690}
691
692impl UsageCounts {
693    #[must_use]
694    pub fn computed_total(&self) -> u64 {
695        self.total_tokens.unwrap_or_else(|| {
696            self.input_tokens
697                .unwrap_or(0)
698                .saturating_add(self.output_tokens.unwrap_or(0))
699                .saturating_add(self.cache_creation_tokens.unwrap_or(0))
700                .saturating_add(self.cache_read_tokens.unwrap_or(0))
701                .saturating_add(self.reasoning_tokens.unwrap_or(0))
702                .saturating_add(self.local_prompt_eval_tokens.unwrap_or(0))
703                .saturating_add(self.local_eval_tokens.unwrap_or(0))
704        })
705    }
706}
707
708#[must_use]
709pub fn hash_text(value: &str) -> String {
710    let digest = Sha256::digest(value.as_bytes());
711    hex::encode(digest)
712}
713
714#[must_use]
715pub fn sanitize_project_for_sync(project: ProjectInfo) -> Option<ProjectInfo> {
716    if !project_has_stable_identity(&project) {
717        return None;
718    }
719    Some(project)
720}
721
722#[must_use]
723pub fn sanitize_summary_for_sync(mut summary: UsageSummary) -> UsageSummary {
724    summary.source.source_record_id = None;
725    if let Some(evidence) = summary.parse_evidence.as_mut() {
726        evidence.source_line_number = None;
727        evidence.source_record_id = None;
728    }
729    summary.project = summary.project.and_then(sanitize_project_for_sync);
730    if project_contains_file_paths(summary.project.as_ref()) {
731        summary.privacy.contains_file_paths = true;
732    }
733    summary
734}
735
736#[must_use]
737pub fn path_hash(path: &Path) -> String {
738    let canonical = canonical_display(path);
739    hash_text(&canonical)
740}
741
742#[must_use]
743pub fn source_id(provider: &str, source_kind: SourceKind, stable_key: &str) -> SourceId {
744    SourceId(format!(
745        "src_{}",
746        &hash_text(&format!("{provider}:{source_kind:?}:{stable_key}"))[..24]
747    ))
748}
749
750#[must_use]
751pub fn provider_account_id(provider: &str, stable_key: &str) -> ProviderAccountId {
752    ProviderAccountId(format!(
753        "acct_{}",
754        &hash_text(&format!("{provider}:{stable_key}"))[..24]
755    ))
756}
757
758#[must_use]
759pub fn normalize_provider_user_id(value: &str) -> String {
760    value.trim().to_string()
761}
762
763#[must_use]
764pub fn normalize_email(value: &str) -> String {
765    value.trim().to_ascii_lowercase()
766}
767
768fn default_identity_source_unknown() -> IdentitySource {
769    IdentitySource::Unknown
770}
771
772#[must_use]
773pub fn provider_account_stable_key(
774    provider_user_id: Option<&str>,
775    email: Option<&str>,
776) -> Option<String> {
777    provider_user_id
778        .map(normalize_provider_user_id)
779        .filter(|value| !value.is_empty())
780        .map(|value| format!("uid:{value}"))
781        .or_else(|| {
782            email
783                .map(normalize_email)
784                .filter(|value| !value.is_empty())
785                .map(|value| format!("email:{value}"))
786        })
787}
788
789#[must_use]
790pub fn provider_account_id_from_identity(
791    provider: &str,
792    provider_user_id: Option<&str>,
793    email: Option<&str>,
794) -> Option<ProviderAccountId> {
795    provider_account_stable_key(provider_user_id, email)
796        .map(|stable_key| provider_account_id(provider, &stable_key))
797}
798
799#[must_use]
800pub fn source_account_assignment_id(
801    source_id: &SourceId,
802    account: &ProviderAccountId,
803    started_at: DateTime<Utc>,
804) -> SourceAccountAssignmentId {
805    SourceAccountAssignmentId(format!(
806        "assign_{}",
807        &hash_text(&format!(
808            "{}:{}:{}",
809            source_id.0,
810            account.0,
811            started_at.to_rfc3339()
812        ))[..24]
813    ))
814}
815
816#[must_use]
817pub fn subscription_id(
818    provider: &str,
819    account: &ProviderAccountId,
820    plan: &str,
821    started_at: DateTime<Utc>,
822) -> SubscriptionId {
823    let account_key = account.0.as_str();
824    let started_at_key = started_at.to_rfc3339();
825    SubscriptionId(format!(
826        "sub_{}",
827        &hash_text(&format!("{provider}:{account_key}:{plan}:{started_at_key}"))[..24]
828    ))
829}
830
831#[must_use]
832pub fn event_id(
833    provider: &str,
834    source_id: &SourceId,
835    source_record_id: &str,
836    session_hash: Option<&str>,
837    timestamp: DateTime<Utc>,
838) -> EventId {
839    EventId(format!(
840        "evt_{}",
841        &hash_text(&format!(
842            "{provider}:{}:{source_record_id}:{}:{}",
843            source_id.0,
844            session_hash.unwrap_or(""),
845            timestamp.to_rfc3339()
846        ))[..32]
847    ))
848}
849
850#[must_use]
851pub fn semantic_event_id(provider: &str, source_id: &SourceId, semantic_key: &str) -> EventId {
852    EventId(format!(
853        "evt_{}",
854        &hash_text(&format!("{provider}:{}:{semantic_key}", source_id.0))[..32]
855    ))
856}
857
858#[must_use]
859pub fn summary_id(provider: &str, source_id: &SourceId, semantic_key: &str) -> SummaryId {
860    SummaryId(format!(
861        "sum_{}",
862        &hash_text(&format!("{provider}:{}:{semantic_key}", source_id.0))[..32]
863    ))
864}
865
866#[must_use]
867pub fn semantic_event_fingerprint(input: &SemanticFingerprintInput<'_>) -> String {
868    hash_text(&format!(
869        "{}:{}:{}:{}:{}:{}:{}:{}:{}:{}:{}:{}",
870        input.provider,
871        input.source_id.0,
872        input.started_at.to_rfc3339(),
873        input.session_hash.unwrap_or(""),
874        input.project_key.unwrap_or(""),
875        input.model_name.unwrap_or("unknown"),
876        input.input_tokens.unwrap_or(0),
877        input.cache_read_tokens.unwrap_or(0),
878        input.cache_creation_tokens.unwrap_or(0),
879        input.output_tokens.unwrap_or(0),
880        input.reasoning_tokens.unwrap_or(0),
881        input.total_tokens
882    ))
883}
884
885pub struct SemanticFingerprintInput<'a> {
886    pub provider: &'a str,
887    pub source_id: &'a SourceId,
888    pub started_at: DateTime<Utc>,
889    pub session_hash: Option<&'a str>,
890    pub project_key: Option<&'a str>,
891    pub model_name: Option<&'a str>,
892    pub input_tokens: Option<u64>,
893    pub cache_read_tokens: Option<u64>,
894    pub cache_creation_tokens: Option<u64>,
895    pub output_tokens: Option<u64>,
896    pub reasoning_tokens: Option<u64>,
897    pub total_tokens: u64,
898}
899
900#[must_use]
901pub fn canonical_display(path: &Path) -> String {
902    path.canonicalize()
903        .unwrap_or_else(|_| expand_home(path))
904        .to_string_lossy()
905        .to_string()
906}
907
908/// Display-friendly path normalization.
909/// Expands `~` for home but does NOT perform filesystem canonicalization
910/// (to avoid symlink/mount identity changes for labels).
911#[must_use]
912pub fn display_path(path: &Path) -> String {
913    expand_home(path).to_string_lossy().to_string()
914}
915
916fn expand_home(path: &Path) -> PathBuf {
917    let text = path.to_string_lossy();
918    if let Some(stripped) = text.strip_prefix("~/") {
919        if let Some(home) = home_dir() {
920            return home.join(stripped);
921        }
922    }
923    path.to_path_buf()
924}
925
926#[must_use]
927pub fn home_dir() -> Option<PathBuf> {
928    std::env::var_os("HOME")
929        .or_else(|| std::env::var_os("USERPROFILE"))
930        .map(PathBuf::from)
931}
932
933#[must_use]
934pub fn expand_home_path(value: &str) -> PathBuf {
935    if value == "~" {
936        return home_dir().unwrap_or_else(|| PathBuf::from(value));
937    }
938    if let Some(rest) = value.strip_prefix("~/") {
939        if let Some(home) = home_dir() {
940            return home.join(rest);
941        }
942    }
943    PathBuf::from(value)
944}
945
946// ── Report building ────────────────────────────────────────────
947
948use chrono::Duration;
949use std::collections::{BTreeMap, BTreeSet};
950
951#[derive(Debug, Clone, Copy, PartialEq, Eq)]
952pub enum ReportPeriod {
953    LastDays(i64),
954    AllTime,
955}
956
957#[derive(Debug, Clone, Default)]
958pub struct UsageTotals {
959    pub input_tokens: u64,
960    pub cache_creation_tokens: u64,
961    pub cached_input_tokens: u64,
962    pub output_tokens: u64,
963    pub reasoning_tokens: u64,
964    pub total_tokens: u64,
965    pub estimated_cost_usd: Option<i64>, // cents USD
966}
967
968impl UsageTotals {
969    pub fn add_event(&mut self, event: &UsageEvent) {
970        self.input_tokens += event.usage.input_tokens.unwrap_or(0);
971        self.cache_creation_tokens += event.usage.cache_creation_tokens.unwrap_or(0);
972        self.cached_input_tokens += event.usage.cache_read_tokens.unwrap_or(0);
973        self.output_tokens += event.usage.output_tokens.unwrap_or(0);
974        self.reasoning_tokens += event.usage.reasoning_tokens.unwrap_or(0);
975        self.total_tokens += event.usage.computed_total();
976        if let Some(cost) = event.cost.estimated_api_equivalent_usd {
977            self.estimated_cost_usd = Some(self.estimated_cost_usd.unwrap_or(0) + cost);
978        }
979    }
980
981    pub fn add_summary(&mut self, summary: &UsageSummary) {
982        self.input_tokens += summary.usage.input_tokens.unwrap_or(0);
983        self.cache_creation_tokens += summary.usage.cache_creation_tokens.unwrap_or(0);
984        self.cached_input_tokens += summary.usage.cache_read_tokens.unwrap_or(0);
985        self.output_tokens += summary.usage.output_tokens.unwrap_or(0);
986        self.reasoning_tokens += summary.usage.reasoning_tokens.unwrap_or(0);
987        self.total_tokens += summary.usage.computed_total();
988        if let Some(cost) = summary
989            .cost
990            .provider_reported_usd
991            .or(summary.cost.estimated_api_equivalent_usd)
992        {
993            self.estimated_cost_usd = Some(self.estimated_cost_usd.unwrap_or(0) + cost);
994        }
995    }
996
997    pub fn add_totals(&mut self, other: &UsageTotals) {
998        self.input_tokens += other.input_tokens;
999        self.cache_creation_tokens += other.cache_creation_tokens;
1000        self.cached_input_tokens += other.cached_input_tokens;
1001        self.output_tokens += other.output_tokens;
1002        self.reasoning_tokens += other.reasoning_tokens;
1003        self.total_tokens += other.total_tokens;
1004        if let Some(cost) = other.estimated_cost_usd {
1005            self.estimated_cost_usd = Some(self.estimated_cost_usd.unwrap_or(0) + cost);
1006        }
1007    }
1008}
1009
1010#[derive(Debug, Clone)]
1011pub struct UsageReportRow {
1012    pub provider: String,
1013    pub account: String,
1014    pub events: u64,
1015    pub usage: UsageTotals,
1016    pub sources: BTreeSet<String>,
1017    pub paths: BTreeSet<String>,
1018}
1019
1020#[derive(Debug, Clone)]
1021pub struct SummaryReportRow {
1022    pub provider: String,
1023    pub account: String,
1024    pub kind: String,
1025    pub summaries: u64,
1026    pub usage: UsageTotals,
1027    pub direct_event_usage: UsageTotals,
1028    pub exact_overlap_summaries: u64,
1029    pub observed_at: Option<DateTime<Utc>>,
1030    pub sources: BTreeSet<String>,
1031    pub paths: BTreeSet<String>,
1032}
1033
1034#[derive(Debug, Clone)]
1035pub struct SubscriptionReportRow {
1036    pub subscription_id: SubscriptionId,
1037    pub provider: String,
1038    pub provider_account_id: ProviderAccountId,
1039    pub account: String,
1040    pub plan_name: String,
1041    pub price: i64, // minor units (cents) of the currency
1042    pub currency: String,
1043    pub billing_period: BillingPeriod,
1044    pub started_at: DateTime<Utc>,
1045    pub ended_at: Option<DateTime<Utc>>,
1046    pub status: SubscriptionStatus,
1047    pub events: u64,
1048    pub usage: UsageTotals,
1049    pub value_minus_price_usd: Option<i64>, // cents USD
1050    pub value_to_price_ratio: Option<f64>,
1051}
1052
1053#[derive(Debug, Clone)]
1054pub struct UsageReport {
1055    pub label: String,
1056    pub since: Option<DateTime<Utc>>,
1057    pub until: DateTime<Utc>,
1058    pub rows: Vec<UsageReportRow>,
1059    pub summary_rows: Vec<SummaryReportRow>,
1060    pub subscription_rows: Vec<SubscriptionReportRow>,
1061    pub total_events: u64,
1062    pub total_usage: UsageTotals,
1063    pub total_summary_usage: UsageTotals,
1064}
1065
1066#[must_use]
1067pub fn build_usage_report(
1068    events: &[UsageEvent],
1069    summaries: &[UsageSummary],
1070    sources: &[SourceLocation],
1071    accounts: &[ProviderAccount],
1072    subscriptions: &[Subscription],
1073    period: ReportPeriod,
1074    now: DateTime<Utc>,
1075) -> UsageReport {
1076    let since = match period {
1077        ReportPeriod::LastDays(days) => Some(now - Duration::days(days)),
1078        ReportPeriod::AllTime => None,
1079    };
1080    let label = match period {
1081        ReportPeriod::LastDays(7) => "last 7 days".to_string(),
1082        ReportPeriod::LastDays(30) => "last 30 days".to_string(),
1083        ReportPeriod::LastDays(days) => format!("last {days} days"),
1084        ReportPeriod::AllTime => "all time".to_string(),
1085    };
1086
1087    let source_by_id: BTreeMap<_, _> = sources
1088        .iter()
1089        .map(|source| (source.source_id.0.as_str(), source))
1090        .collect();
1091    let account_by_id: BTreeMap<_, _> = accounts
1092        .iter()
1093        .map(|account| (account.provider_account_id.0.as_str(), account))
1094        .collect();
1095    let mut rows: BTreeMap<(String, String), UsageReportRow> = BTreeMap::new();
1096
1097    for event in events {
1098        if since.is_some_and(|since| event.session.started_at < since)
1099            || event.session.started_at > now
1100        {
1101            continue;
1102        }
1103
1104        let source = source_by_id.get(event.source_id.0.as_str()).copied();
1105        let account = report_account_label(event, &account_by_id);
1106        let key = (event.provider.clone(), account.clone());
1107        let row = rows.entry(key).or_insert_with(|| UsageReportRow {
1108            provider: event.provider.clone(),
1109            account,
1110            events: 0,
1111            usage: UsageTotals::default(),
1112            sources: BTreeSet::new(),
1113            paths: BTreeSet::new(),
1114        });
1115        row.events += 1;
1116        row.usage.add_event(event);
1117        row.sources.insert(event.source_id.0.clone());
1118        if let Some(source) = source {
1119            row.paths.insert(preview_path_label(source));
1120        }
1121    }
1122
1123    let mut summary_rows: BTreeMap<(String, String, String), SummaryReportRow> = BTreeMap::new();
1124    if matches!(period, ReportPeriod::AllTime) {
1125        for summary in summaries {
1126            if summary.observed_at > now {
1127                continue;
1128            }
1129
1130            let source = source_by_id.get(summary.source_id.0.as_str()).copied();
1131            let account =
1132                report_identity_label(summary.provider_account_id.as_ref(), &account_by_id);
1133            let kind = summary.metadata.summary_format.clone();
1134            let key = (summary.provider.clone(), account.clone(), kind.clone());
1135            let direct_overlap_usage =
1136                direct_usage_for_summary(summary, &account, events, &account_by_id, now);
1137            let exact_overlap =
1138                summary_usage_matches_direct_overlap(summary, &direct_overlap_usage);
1139            let row = summary_rows
1140                .entry(key.clone())
1141                .or_insert_with(|| SummaryReportRow {
1142                    provider: summary.provider.clone(),
1143                    account,
1144                    kind,
1145                    summaries: 0,
1146                    usage: UsageTotals::default(),
1147                    direct_event_usage: UsageTotals::default(),
1148                    exact_overlap_summaries: 0,
1149                    observed_at: None,
1150                    sources: BTreeSet::new(),
1151                    paths: BTreeSet::new(),
1152                });
1153            row.summaries += 1;
1154            row.usage.add_summary(summary);
1155            row.direct_event_usage.add_totals(&direct_overlap_usage);
1156            if exact_overlap {
1157                row.exact_overlap_summaries += 1;
1158            }
1159            row.observed_at = Some(
1160                row.observed_at
1161                    .map(|observed_at| observed_at.max(summary.observed_at))
1162                    .unwrap_or(summary.observed_at),
1163            );
1164            row.sources.insert(summary.source_id.0.clone());
1165            if let Some(source) = source {
1166                row.paths.insert(preview_path_label(source));
1167            }
1168        }
1169    }
1170
1171    let mut rows: Vec<_> = rows.into_values().collect();
1172    rows.sort_by(|left, right| {
1173        right
1174            .usage
1175            .total_tokens
1176            .cmp(&left.usage.total_tokens)
1177            .then_with(|| left.account.cmp(&right.account))
1178    });
1179    let total_events = rows.iter().map(|row| row.events).sum();
1180    let mut total_usage = UsageTotals::default();
1181    for row in &rows {
1182        total_usage.add_totals(&row.usage);
1183    }
1184    let mut summary_rows: Vec<_> = summary_rows.into_values().collect();
1185    summary_rows.sort_by(|left, right| {
1186        right
1187            .usage
1188            .total_tokens
1189            .cmp(&left.usage.total_tokens)
1190            .then_with(|| left.account.cmp(&right.account))
1191            .then_with(|| left.kind.cmp(&right.kind))
1192    });
1193    let mut total_summary_usage = UsageTotals::default();
1194    for row in &summary_rows {
1195        total_summary_usage.add_totals(&row.usage);
1196    }
1197    let subscription_rows =
1198        build_subscription_report_rows(events, subscriptions, &account_by_id, since, now);
1199
1200    UsageReport {
1201        label,
1202        since,
1203        until: now,
1204        rows,
1205        summary_rows,
1206        subscription_rows,
1207        total_events,
1208        total_usage,
1209        total_summary_usage,
1210    }
1211}
1212
1213fn report_account_label(event: &UsageEvent, accounts: &BTreeMap<&str, &ProviderAccount>) -> String {
1214    report_identity_label(event.provider_account_id.as_ref(), accounts)
1215}
1216
1217fn direct_usage_for_summary(
1218    summary: &UsageSummary,
1219    summary_account: &str,
1220    events: &[UsageEvent],
1221    accounts: &BTreeMap<&str, &ProviderAccount>,
1222    now: DateTime<Utc>,
1223) -> UsageTotals {
1224    let start = summary.period_start.unwrap_or(summary.observed_at);
1225    let end = summary.period_end.unwrap_or(summary.observed_at).min(now);
1226    let mut usage = UsageTotals::default();
1227    for event in events {
1228        if event.provider != summary.provider
1229            || event.session.started_at < start
1230            || event.session.started_at > end
1231        {
1232            continue;
1233        }
1234        if report_account_label(event, accounts) != summary_account {
1235            continue;
1236        }
1237        usage.add_event(event);
1238    }
1239    usage
1240}
1241
1242fn summary_usage_matches_direct_overlap(summary: &UsageSummary, direct: &UsageTotals) -> bool {
1243    if direct.total_tokens == 0 || summary.usage.computed_total() != direct.total_tokens {
1244        return false;
1245    }
1246    let summary_input = summary.usage.input_tokens.unwrap_or(0);
1247    let direct_input_matches = direct.input_tokens == summary_input
1248        || direct
1249            .input_tokens
1250            .saturating_sub(direct.cached_input_tokens)
1251            == summary_input;
1252    direct_input_matches
1253        && summary.usage.cache_creation_tokens.unwrap_or(0) == direct.cache_creation_tokens
1254        && summary.usage.cache_read_tokens.unwrap_or(0) == direct.cached_input_tokens
1255        && summary.usage.output_tokens.unwrap_or(0) == direct.output_tokens
1256        && summary.usage.reasoning_tokens.unwrap_or(0) == direct.reasoning_tokens
1257}
1258
1259fn report_identity_label(
1260    provider_account_id: Option<&ProviderAccountId>,
1261    accounts: &BTreeMap<&str, &ProviderAccount>,
1262) -> String {
1263    if let Some(account_id) = provider_account_id {
1264        if let Some(account) = accounts.get(account_id.0.as_str()) {
1265            return display_account_identity(account);
1266        }
1267    }
1268    provider_account_id
1269        .map(|id| id.0.clone())
1270        .unwrap_or_else(|| "unassigned".to_string())
1271}
1272
1273fn preview_path_label(source: &SourceLocation) -> String {
1274    let path = source.path_label.as_deref().unwrap_or("unknown");
1275    if let Some(home) = home_dir() {
1276        let home = home.to_string_lossy();
1277        if let Some(rest) = path.strip_prefix(home.as_ref()) {
1278            return format!("~{rest}");
1279        }
1280    }
1281    path.to_string()
1282}
1283
1284#[must_use]
1285pub fn timestamp_in_period(
1286    timestamp: DateTime<Utc>,
1287    started_at: DateTime<Utc>,
1288    ended_at: Option<DateTime<Utc>>,
1289) -> bool {
1290    timestamp >= started_at
1291        && ended_at
1292            .map(|ended_at| timestamp < ended_at)
1293            .unwrap_or(true)
1294}
1295
1296#[must_use]
1297pub fn periods_overlap(
1298    left_started_at: DateTime<Utc>,
1299    left_ended_at: Option<DateTime<Utc>>,
1300    right_started_at: DateTime<Utc>,
1301    right_ended_at: Option<DateTime<Utc>>,
1302) -> bool {
1303    let left_end = left_ended_at.unwrap_or(DateTime::<Utc>::MAX_UTC);
1304    let right_end = right_ended_at.unwrap_or(DateTime::<Utc>::MAX_UTC);
1305    left_started_at < right_end && right_started_at < left_end
1306}
1307
1308fn build_subscription_report_rows(
1309    events: &[UsageEvent],
1310    subscriptions: &[Subscription],
1311    accounts: &BTreeMap<&str, &ProviderAccount>,
1312    since: Option<DateTime<Utc>>,
1313    now: DateTime<Utc>,
1314) -> Vec<SubscriptionReportRow> {
1315    let mut rows = Vec::new();
1316    for subscription in subscriptions {
1317        let provider_account_id = &subscription.provider_account_id;
1318        let started_at = subscription.started_at;
1319        let ended_at = effective_subscription_ended_at(subscription);
1320        if !subscription_intersects_report_window(started_at, ended_at, since, now) {
1321            continue;
1322        }
1323        let mut usage = UsageTotals::default();
1324        let mut events_count = 0u64;
1325        for event in events {
1326            if event.provider != subscription.provider {
1327                continue;
1328            }
1329            if event.provider_account_id.as_ref() != Some(provider_account_id) {
1330                continue;
1331            }
1332            if since.is_some_and(|since| event.session.started_at < since)
1333                || event.session.started_at > now
1334            {
1335                continue;
1336            }
1337            if !timestamp_in_period(event.session.started_at, started_at, ended_at) {
1338                continue;
1339            }
1340            events_count += 1;
1341            usage.add_event(event);
1342        }
1343        let account = accounts
1344            .get(provider_account_id.0.as_str())
1345            .map(|account| display_account_identity(account))
1346            .unwrap_or_else(|| provider_account_id.0.clone());
1347        let (value_minus_price_usd, value_to_price_ratio) = subscription_value_metrics(
1348            subscription.price,
1349            &subscription.currency,
1350            usage.estimated_cost_usd,
1351        );
1352        rows.push(SubscriptionReportRow {
1353            subscription_id: subscription.subscription_id.clone(),
1354            provider: subscription.provider.clone(),
1355            provider_account_id: provider_account_id.clone(),
1356            account,
1357            plan_name: subscription.plan_name.clone(),
1358            price: subscription.price,
1359            currency: subscription.currency.clone(),
1360            billing_period: subscription.billing_period.clone(),
1361            started_at,
1362            ended_at,
1363            status: subscription.status.clone(),
1364            events: events_count,
1365            usage,
1366            value_minus_price_usd,
1367            value_to_price_ratio,
1368        });
1369    }
1370    rows.sort_by(|left, right| {
1371        right
1372            .usage
1373            .total_tokens
1374            .cmp(&left.usage.total_tokens)
1375            .then_with(|| left.started_at.cmp(&right.started_at))
1376            .then_with(|| left.plan_name.cmp(&right.plan_name))
1377    });
1378    rows
1379}
1380
1381fn effective_subscription_ended_at(subscription: &Subscription) -> Option<DateTime<Utc>> {
1382    if is_legacy_open_verified_subscription(subscription) {
1383        None
1384    } else {
1385        subscription.ended_at
1386    }
1387}
1388
1389fn is_legacy_open_verified_subscription(subscription: &Subscription) -> bool {
1390    subscription.status == SubscriptionStatus::Active
1391        && is_verified_subscription_source(&subscription.record_source)
1392        && subscription.ended_at.is_some()
1393        && subscription.ended_at == subscription.current_period_ends_at
1394}
1395
1396fn is_verified_subscription_source(source: &IdentitySource) -> bool {
1397    matches!(
1398        source,
1399        IdentitySource::LocalAuth
1400            | IdentitySource::ProviderAuth
1401            | IdentitySource::ProviderApi
1402            | IdentitySource::CookieOauth
1403            | IdentitySource::CliProbe
1404    )
1405}
1406
1407fn subscription_intersects_report_window(
1408    started_at: DateTime<Utc>,
1409    ended_at: Option<DateTime<Utc>>,
1410    since: Option<DateTime<Utc>>,
1411    now: DateTime<Utc>,
1412) -> bool {
1413    if started_at > now {
1414        return false;
1415    }
1416    let window_start = since.unwrap_or(DateTime::<Utc>::MIN_UTC);
1417    periods_overlap(
1418        started_at,
1419        ended_at,
1420        window_start,
1421        Some(now + Duration::seconds(1)),
1422    )
1423}
1424
1425fn subscription_value_metrics(
1426    price_cents: i64,
1427    currency: &str,
1428    estimated_cost_usd_cents: Option<i64>,
1429) -> (Option<i64>, Option<f64>) {
1430    if !currency.eq_ignore_ascii_case("USD") || price_cents <= 0 {
1431        return (None, None);
1432    }
1433    estimated_cost_usd_cents
1434        .map(|est_cents| {
1435            (
1436                Some(est_cents - price_cents),
1437                Some(est_cents as f64 / price_cents as f64),
1438            )
1439        })
1440        .unwrap_or((None, None))
1441}
1442
1443pub fn display_account_identity(account: &ProviderAccount) -> String {
1444    account
1445        .account_label
1446        .as_deref()
1447        .filter(|label| !label.trim().is_empty())
1448        .map(ToOwned::to_owned)
1449        .unwrap_or_else(|| account.provider_account_id.0.clone())
1450}
1451
1452#[cfg(test)]
1453mod tests {
1454    use super::*;
1455
1456    #[test]
1457    fn source_ids_are_stable_for_same_input() {
1458        let a = source_id("codex", SourceKind::LocalAdapter, "abc");
1459        let b = source_id("codex", SourceKind::LocalAdapter, "abc");
1460        assert_eq!(a, b);
1461    }
1462
1463    #[test]
1464    fn source_ids_change_by_provider() {
1465        let codex = source_id("codex", SourceKind::LocalAdapter, "abc");
1466        let claude = source_id("claude_code", SourceKind::LocalAdapter, "abc");
1467        assert_ne!(codex, claude);
1468    }
1469
1470    #[test]
1471    fn total_falls_back_to_parts() {
1472        let usage = UsageCounts {
1473            input_tokens: Some(10),
1474            output_tokens: Some(5),
1475            cache_read_tokens: Some(2),
1476            ..UsageCounts::default()
1477        };
1478        assert_eq!(usage.computed_total(), 17);
1479    }
1480
1481    #[test]
1482    fn schema_types_serialize() {
1483        let schema = schemars::schema_for!(UsageEvent);
1484        let json = serde_json::to_value(schema).expect("schema should serialize");
1485        assert!(json.get("title").is_some());
1486
1487        let schema = schemars::schema_for!(UsageSummary);
1488        let json = serde_json::to_value(schema).expect("summary schema should serialize");
1489        assert!(json.get("title").is_some());
1490    }
1491
1492    fn test_source(provider: &str, path: &str) -> SourceLocation {
1493        SourceLocation::local_adapter(
1494            provider,
1495            "test",
1496            "0",
1497            Path::new(path),
1498            LocationOrigin::Configured,
1499        )
1500    }
1501
1502    fn test_event(
1503        provider: &str,
1504        source: &SourceLocation,
1505        started_at: DateTime<Utc>,
1506        tokens: u64,
1507        cost_cents: Option<i64>,
1508    ) -> UsageEvent {
1509        UsageEvent {
1510            schema_version: USAGE_EVENT_SCHEMA_VERSION.to_string(),
1511            event_id: event_id(provider, &source.source_id, "rec", None, started_at),
1512            device_id: "d".to_string(),
1513            provider: provider.to_string(),
1514            source_id: source.source_id.clone(),
1515            provider_account_id: None,
1516            subscription_id: None,
1517            source: EventSource {
1518                adapter_id: "test".to_string(),
1519                adapter_version: "0".to_string(),
1520                source_kind: SourceKind::LocalAdapter,
1521                location_origin: Some(LocationOrigin::Configured),
1522                source_type: "jsonl".to_string(),
1523                source_path_hash: None,
1524                source_record_id: Some("rec".to_string()),
1525                parse_confidence: Confidence::High,
1526            },
1527            session: SessionInfo {
1528                session_id: "s".to_string(),
1529                local_session_id_hash: None,
1530                title: None,
1531                started_at,
1532                ended_at: None,
1533                duration_seconds: None,
1534            },
1535            model: None,
1536            usage: UsageCounts {
1537                input_tokens: Some(tokens / 2),
1538                output_tokens: Some(tokens / 2),
1539                total_tokens: Some(tokens),
1540                ..UsageCounts::default()
1541            },
1542            runtime: None,
1543            cost: CostInfo {
1544                currency: "USD".to_string(),
1545                estimated_api_equivalent_usd: cost_cents,
1546                provider_reported_usd: None,
1547                pricing_source: None,
1548                pricing_version: None,
1549                confidence: Confidence::Low,
1550            },
1551            parse_evidence: None,
1552            project: None,
1553            git: None,
1554            privacy: PrivacyInfo {
1555                mode: PrivacyMode::MetadataOnly,
1556                contains_prompt_text: false,
1557                contains_response_text: false,
1558                contains_file_paths: false,
1559            },
1560            created_at: started_at,
1561            imported_at: started_at,
1562        }
1563    }
1564
1565    fn test_summary(
1566        provider: &str,
1567        source: &SourceLocation,
1568        observed_at: DateTime<Utc>,
1569        period_start: DateTime<Utc>,
1570        period_end: DateTime<Utc>,
1571        tokens: u64,
1572    ) -> UsageSummary {
1573        UsageSummary {
1574            schema_version: USAGE_SUMMARY_SCHEMA_VERSION.to_string(),
1575            summary_id: summary_id(provider, &source.source_id, "sum"),
1576            device_id: "d".to_string(),
1577            provider: provider.to_string(),
1578            source_id: source.source_id.clone(),
1579            provider_account_id: None,
1580            source: EventSource {
1581                adapter_id: "test".to_string(),
1582                adapter_version: "0".to_string(),
1583                source_kind: SourceKind::LocalSummary,
1584                location_origin: Some(LocationOrigin::Configured),
1585                source_type: "cache".to_string(),
1586                source_path_hash: None,
1587                source_record_id: Some("rec".to_string()),
1588                parse_confidence: Confidence::Medium,
1589            },
1590            model: None,
1591            models: Vec::new(),
1592            usage: UsageCounts {
1593                input_tokens: Some(tokens),
1594                total_tokens: Some(tokens),
1595                ..UsageCounts::default()
1596            },
1597            cost: CostInfo {
1598                currency: "USD".to_string(),
1599                estimated_api_equivalent_usd: None,
1600                provider_reported_usd: None,
1601                pricing_source: None,
1602                pricing_version: None,
1603                confidence: Confidence::Low,
1604            },
1605            parse_evidence: None,
1606            project: None,
1607            privacy: PrivacyInfo {
1608                mode: PrivacyMode::MetadataOnly,
1609                contains_prompt_text: false,
1610                contains_response_text: false,
1611                contains_file_paths: false,
1612            },
1613            metrics: None,
1614            period_start: Some(period_start),
1615            period_end: Some(period_end),
1616            observed_at,
1617            metadata: SummaryMetadata {
1618                summary_format: "stats_cache".to_string(),
1619                summary_version: None,
1620                total_sessions: Some(1),
1621                total_messages: Some(10),
1622                last_computed_at: Some(observed_at),
1623            },
1624            imported_at: observed_at,
1625        }
1626    }
1627
1628    fn mk_dt(year: i32, month: u32, day: u32) -> DateTime<Utc> {
1629        chrono::NaiveDate::from_ymd_opt(year, month, day)
1630            .and_then(|d| d.and_hms_opt(0, 0, 0))
1631            .map(|dt| dt.and_utc())
1632            .expect("valid date")
1633    }
1634
1635    #[test]
1636    fn report_empty_inputs_returns_zero_totals() {
1637        let now = mk_dt(2026, 5, 25);
1638        let report = build_usage_report(&[], &[], &[], &[], &[], ReportPeriod::AllTime, now);
1639        assert_eq!(report.total_events, 0);
1640        assert_eq!(report.total_usage.total_tokens, 0);
1641        assert!(report.rows.is_empty());
1642        assert!(report.summary_rows.is_empty());
1643    }
1644
1645    #[test]
1646    fn report_filters_events_by_period() {
1647        let now = mk_dt(2026, 5, 25);
1648        let source = test_source("codex", "/tmp/codex");
1649        let recent = test_event("codex", &source, mk_dt(2026, 5, 24), 100, None);
1650        let old = test_event("codex", &source, mk_dt(2026, 5, 10), 200, None);
1651
1652        let report = build_usage_report(
1653            &[recent, old],
1654            &[],
1655            &[source],
1656            &[],
1657            &[],
1658            ReportPeriod::LastDays(7),
1659            now,
1660        );
1661
1662        assert_eq!(report.total_events, 1);
1663        assert_eq!(report.total_usage.total_tokens, 100);
1664    }
1665
1666    #[test]
1667    fn report_filters_out_future_events() {
1668        let now = mk_dt(2026, 5, 25);
1669        let source = test_source("codex", "/tmp/codex");
1670        let future = test_event("codex", &source, mk_dt(2026, 6, 1), 100, None);
1671        let present = test_event("codex", &source, now, 50, None);
1672
1673        let report = build_usage_report(
1674            &[future, present],
1675            &[],
1676            &[source],
1677            &[],
1678            &[],
1679            ReportPeriod::AllTime,
1680            now,
1681        );
1682
1683        assert_eq!(report.total_events, 1);
1684        assert_eq!(report.total_usage.total_tokens, 50);
1685    }
1686
1687    #[test]
1688    fn report_groups_events_by_provider_and_account() {
1689        let now = mk_dt(2026, 5, 25);
1690        let src = test_source("codex", "/tmp/codex");
1691        let e1 = test_event("codex", &src, now, 100, None);
1692        let e2 = test_event("codex", &src, now, 200, None);
1693
1694        let report =
1695            build_usage_report(&[e1, e2], &[], &[src], &[], &[], ReportPeriod::AllTime, now);
1696
1697        assert_eq!(report.rows.len(), 1);
1698        assert_eq!(report.rows[0].provider, "codex");
1699        assert_eq!(report.rows[0].events, 2);
1700        assert_eq!(report.rows[0].usage.total_tokens, 300);
1701    }
1702
1703    #[test]
1704    fn report_keeps_summaries_separate_from_events() {
1705        let now = mk_dt(2026, 5, 25);
1706        let src = test_source("claude_code", "/tmp/claude");
1707        let event = test_event("claude_code", &src, now, 100, None);
1708        let summary = test_summary(
1709            "claude_code",
1710            &src,
1711            now,
1712            mk_dt(2026, 5, 1),
1713            mk_dt(2026, 5, 25),
1714            500,
1715        );
1716
1717        let report = build_usage_report(
1718            &[event],
1719            &[summary],
1720            &[src],
1721            &[],
1722            &[],
1723            ReportPeriod::AllTime,
1724            now,
1725        );
1726
1727        assert_eq!(report.total_usage.total_tokens, 100);
1728        assert_eq!(report.total_summary_usage.total_tokens, 500);
1729        assert_eq!(report.summary_rows.len(), 1);
1730        // Direct event usage within summary period
1731        assert_eq!(report.summary_rows[0].direct_event_usage.total_tokens, 100);
1732    }
1733
1734    #[test]
1735    fn report_hides_summaries_in_non_alltime_periods() {
1736        let now = mk_dt(2026, 5, 25);
1737        let src = test_source("claude_code", "/tmp/claude");
1738        let summary = test_summary(
1739            "claude_code",
1740            &src,
1741            now,
1742            mk_dt(2026, 5, 1),
1743            mk_dt(2026, 5, 25),
1744            500,
1745        );
1746
1747        let report = build_usage_report(
1748            &[],
1749            &[summary],
1750            &[src],
1751            &[],
1752            &[],
1753            ReportPeriod::LastDays(7),
1754            now,
1755        );
1756
1757        assert!(report.summary_rows.is_empty());
1758    }
1759
1760    #[test]
1761    fn subscription_rows_respect_past_end_time() {
1762        let now = mk_dt(2026, 6, 1);
1763        let src = test_source("codex", "/tmp/codex");
1764        let account_id = provider_account_id("codex", "email:verified@example.com");
1765        let account = ProviderAccount {
1766            schema_version: PROVIDER_ACCOUNT_SCHEMA_VERSION.to_string(),
1767            provider_account_id: account_id.clone(),
1768            provider: "codex".to_string(),
1769            identity_source: IdentitySource::LocalAuth,
1770            provider_user_id: Some("11111111-2222-4333-8444-555555555555".to_string()),
1771            provider_user_id_hash: None,
1772            email: Some("verified@example.com".to_string()),
1773            email_hash: None,
1774            org_id_hash: None,
1775            account_label: None,
1776            plan_name: Some("Plus".to_string()),
1777            confidence: Confidence::High,
1778            verified_at: Some(mk_dt(2026, 5, 3)),
1779            created_at: mk_dt(2026, 5, 3),
1780            updated_at: mk_dt(2026, 5, 3),
1781        };
1782        let mut before_end = test_event("codex", &src, mk_dt(2026, 5, 29), 100, Some(100));
1783        before_end.provider_account_id = Some(account_id.clone());
1784        let mut after_end = test_event("codex", &src, mk_dt(2026, 5, 31), 200, Some(200));
1785        after_end.provider_account_id = Some(account_id.clone());
1786        let subscription = Subscription {
1787            schema_version: SUBSCRIPTION_SCHEMA_VERSION.to_string(),
1788            subscription_id: subscription_id("codex", &account_id, "Plus", mk_dt(2026, 4, 30)),
1789            provider: "codex".to_string(),
1790            provider_account_id: account_id.clone(),
1791            plan_name: "Plus".to_string(),
1792            price: 2000,
1793            currency: "USD".to_string(),
1794            billing_period: BillingPeriod::Monthly,
1795            paid_at: Some(mk_dt(2026, 4, 30)),
1796            renewal_day: Some(30),
1797            started_at: mk_dt(2026, 4, 30),
1798            ended_at: Some(mk_dt(2026, 5, 30)),
1799            current_period_ends_at: Some(mk_dt(2026, 5, 30)),
1800            status: SubscriptionStatus::Cancelled,
1801            record_source: IdentitySource::LocalAuth,
1802            verified_at: Some(mk_dt(2026, 5, 3)),
1803            notes: None,
1804        };
1805
1806        let report = build_usage_report(
1807            &[before_end, after_end],
1808            &[],
1809            &[src],
1810            &[account],
1811            &[subscription],
1812            ReportPeriod::LastDays(30),
1813            now,
1814        );
1815
1816        assert_eq!(report.subscription_rows.len(), 1);
1817        assert_eq!(report.subscription_rows[0].account, account_id.0);
1818        assert_eq!(
1819            report.subscription_rows[0].ended_at,
1820            Some(mk_dt(2026, 5, 30))
1821        );
1822        assert_eq!(report.subscription_rows[0].events, 1);
1823        assert_eq!(report.subscription_rows[0].usage.total_tokens, 100);
1824        assert_eq!(
1825            report.subscription_rows[0].usage.estimated_cost_usd,
1826            Some(100)
1827        );
1828    }
1829
1830    #[test]
1831    fn subscription_rows_keep_legacy_verified_cycle_rows_open() {
1832        let now = mk_dt(2026, 6, 1);
1833        let src = test_source("codex", "/tmp/codex");
1834        let account_id = provider_account_id("codex", "email:verified@example.com");
1835        let account = ProviderAccount {
1836            schema_version: PROVIDER_ACCOUNT_SCHEMA_VERSION.to_string(),
1837            provider_account_id: account_id.clone(),
1838            provider: "codex".to_string(),
1839            identity_source: IdentitySource::LocalAuth,
1840            provider_user_id: None,
1841            provider_user_id_hash: None,
1842            email: Some("verified@example.com".to_string()),
1843            email_hash: None,
1844            org_id_hash: None,
1845            account_label: None,
1846            plan_name: Some("Plus".to_string()),
1847            confidence: Confidence::High,
1848            verified_at: Some(mk_dt(2026, 5, 3)),
1849            created_at: mk_dt(2026, 5, 3),
1850            updated_at: mk_dt(2026, 5, 3),
1851        };
1852        let mut before_cycle_end = test_event("codex", &src, mk_dt(2026, 5, 29), 100, Some(100));
1853        before_cycle_end.provider_account_id = Some(account_id.clone());
1854        let mut after_cycle_end = test_event("codex", &src, mk_dt(2026, 5, 31), 200, Some(200));
1855        after_cycle_end.provider_account_id = Some(account_id.clone());
1856        let subscription = Subscription {
1857            schema_version: SUBSCRIPTION_SCHEMA_VERSION.to_string(),
1858            subscription_id: subscription_id("codex", &account_id, "Plus", mk_dt(2026, 4, 30)),
1859            provider: "codex".to_string(),
1860            provider_account_id: account_id,
1861            plan_name: "Plus".to_string(),
1862            price: 2000,
1863            currency: "USD".to_string(),
1864            billing_period: BillingPeriod::Monthly,
1865            paid_at: Some(mk_dt(2026, 4, 30)),
1866            renewal_day: Some(30),
1867            started_at: mk_dt(2026, 4, 30),
1868            ended_at: Some(mk_dt(2026, 5, 30)),
1869            current_period_ends_at: Some(mk_dt(2026, 5, 30)),
1870            status: SubscriptionStatus::Active,
1871            record_source: IdentitySource::LocalAuth,
1872            verified_at: Some(mk_dt(2026, 5, 3)),
1873            notes: None,
1874        };
1875
1876        let report = build_usage_report(
1877            &[before_cycle_end, after_cycle_end],
1878            &[],
1879            &[src],
1880            &[account],
1881            &[subscription],
1882            ReportPeriod::LastDays(30),
1883            now,
1884        );
1885
1886        assert_eq!(report.subscription_rows.len(), 1);
1887        assert_eq!(report.subscription_rows[0].ended_at, None);
1888        assert_eq!(report.subscription_rows[0].events, 2);
1889        assert_eq!(report.subscription_rows[0].usage.total_tokens, 300);
1890        assert_eq!(
1891            report.subscription_rows[0].usage.estimated_cost_usd,
1892            Some(300)
1893        );
1894    }
1895
1896    #[test]
1897    fn report_uses_account_label_from_registry() {
1898        let now = mk_dt(2026, 5, 25);
1899        let src = test_source("codex", "/tmp/codex");
1900        let acct_id = provider_account_id("codex", "stable");
1901        let account = ProviderAccount {
1902            schema_version: PROVIDER_ACCOUNT_SCHEMA_VERSION.to_string(),
1903            provider_account_id: acct_id.clone(),
1904            provider: "codex".to_string(),
1905            identity_source: IdentitySource::UserConfigured,
1906            provider_user_id: None,
1907            provider_user_id_hash: None,
1908            email: None,
1909            email_hash: None,
1910            org_id_hash: None,
1911            account_label: Some("work".to_string()),
1912            plan_name: None,
1913            confidence: Confidence::Medium,
1914            verified_at: None,
1915            created_at: now,
1916            updated_at: now,
1917        };
1918        let mut event = test_event("codex", &src, now, 50, None);
1919        event.provider_account_id = Some(acct_id);
1920
1921        let report = build_usage_report(
1922            &[event],
1923            &[],
1924            &[src],
1925            &[account],
1926            &[],
1927            ReportPeriod::AllTime,
1928            now,
1929        );
1930
1931        assert_eq!(report.rows[0].account, "work");
1932    }
1933
1934    #[test]
1935    fn usage_totals_accumulate_cost() {
1936        let now = mk_dt(2026, 5, 25);
1937        let src = test_source("codex", "/tmp/codex");
1938        let e1 = test_event("codex", &src, now, 100, Some(1));
1939        let e2 = test_event("codex", &src, now, 200, Some(2));
1940
1941        let report =
1942            build_usage_report(&[e1, e2], &[], &[src], &[], &[], ReportPeriod::AllTime, now);
1943
1944        assert_eq!(report.total_usage.estimated_cost_usd, Some(3));
1945    }
1946
1947    #[test]
1948    fn computed_total_does_not_overflow() {
1949        let usage = UsageCounts {
1950            input_tokens: Some(u64::MAX),
1951            output_tokens: Some(u64::MAX),
1952            ..UsageCounts::default()
1953        };
1954        let total = usage.computed_total();
1955        assert_eq!(total, u64::MAX);
1956    }
1957
1958    #[test]
1959    fn display_path_expands_home_but_avoids_canonicalize() {
1960        let p = Path::new("~/relative/test");
1961        let displayed = display_path(p);
1962        assert!(displayed.contains("relative/test"));
1963        // should not resolve to absolute via fs if ~ expanded
1964        if let Some(home) = home_dir() {
1965            let home_str = home.to_string_lossy();
1966            if displayed.starts_with(home_str.as_ref()) {
1967                // expanded, good
1968            }
1969        }
1970    }
1971
1972    #[test]
1973    fn path_hash_remains_stable_via_canonical_display() {
1974        let p = Path::new("/tmp/nonexistent-for-test");
1975        let h1 = path_hash(p);
1976        let h2 = path_hash(p);
1977        assert_eq!(h1, h2);
1978    }
1979
1980    #[test]
1981    fn bare_project_id_is_not_a_stable_project_identity() {
1982        let project = ProjectInfo {
1983            project_id: "project_bare".to_string(),
1984            project_label: Some("Bare".to_string()),
1985            repo_remote_hash: None,
1986            repo_label: None,
1987            branch_hash: None,
1988            branch_label: None,
1989            path_hash: None,
1990            path_label: None,
1991        };
1992
1993        assert!(!project_has_stable_identity(&project));
1994        assert_eq!(project_bucket_key(Some(&project)), "none");
1995    }
1996
1997    #[test]
1998    fn sanitize_project_for_sync_preserves_path_only_project_labels() {
1999        let project = ProjectInfo {
2000            project_id: "project_path_only".to_string(),
2001            project_label: Some("Scratch".to_string()),
2002            repo_remote_hash: None,
2003            repo_label: None,
2004            branch_hash: None,
2005            branch_label: None,
2006            path_hash: Some("path-hash".to_string()),
2007            path_label: Some("/Users/example/Scratch".to_string()),
2008        };
2009
2010        let sanitized = sanitize_project_for_sync(project).expect("stable path identity");
2011
2012        assert_eq!(sanitized.repo_remote_hash, None);
2013        assert_eq!(sanitized.path_hash.as_deref(), Some("path-hash"));
2014        assert_eq!(
2015            sanitized.path_label.as_deref(),
2016            Some("/Users/example/Scratch")
2017        );
2018        assert!(project_contains_file_paths(Some(&sanitized)));
2019    }
2020
2021    #[test]
2022    fn sanitize_project_for_sync_drops_bare_project_ids() {
2023        let project = ProjectInfo {
2024            project_id: "project_bare".to_string(),
2025            project_label: Some("Bare".to_string()),
2026            repo_remote_hash: None,
2027            repo_label: None,
2028            branch_hash: None,
2029            branch_label: None,
2030            path_hash: None,
2031            path_label: Some("/Users/example/Bare".to_string()),
2032        };
2033
2034        assert!(sanitize_project_for_sync(project).is_none());
2035    }
2036
2037    #[test]
2038    fn sanitize_summary_for_sync_marks_project_path_labels_as_file_paths() {
2039        let now = mk_dt(2026, 5, 25);
2040        let source = test_source("codex", "/tmp/codex");
2041        let mut summary = test_summary("codex", &source, now, now, now, 100);
2042        summary.project = Some(ProjectInfo {
2043            project_id: "project_path_only".to_string(),
2044            project_label: Some("Scratch".to_string()),
2045            repo_remote_hash: None,
2046            repo_label: None,
2047            branch_hash: None,
2048            branch_label: None,
2049            path_hash: Some("path-hash".to_string()),
2050            path_label: Some("/Users/example/Scratch".to_string()),
2051        });
2052
2053        let sanitized = sanitize_summary_for_sync(summary);
2054
2055        assert_eq!(
2056            sanitized
2057                .project
2058                .as_ref()
2059                .and_then(|project| project.path_label.as_deref()),
2060            Some("/Users/example/Scratch")
2061        );
2062        assert!(sanitized.privacy.contains_file_paths);
2063    }
2064
2065    #[test]
2066    fn preview_path_label_uses_display_label() {
2067        let mut source = test_source("codex", "/tmp/codex");
2068        source.path_label = Some("/home/testuser/work/codex".to_string());
2069        let preview = preview_path_label(&source);
2070        // if home matches, abbreviates; else full
2071        assert!(preview.contains("codex") || preview.contains("work"));
2072    }
2073}