cognee_telemetry/payload.rs
1//! Strongly-typed serde model of the `send_telemetry` proxy payload.
2//!
3//! Field-for-field parity with Python's
4//! `cognee.shared.utils.send_telemetry` (utils.py:176-228). Includes
5//! the backward-compat `api_key_hash` alias (utils.py:226) which
6//! carries the same value as `api_key_tracking_id`.
7
8#[cfg(feature = "telemetry")]
9use serde::Serialize;
10#[cfg(feature = "telemetry")]
11use serde_json::Value;
12
13/// Top-level proxy payload, dispatched as the body of
14/// `POST https://test.prometh.ai`.
15#[cfg(feature = "telemetry")]
16#[derive(Debug, Serialize)]
17pub struct TelemetryPayload<'a> {
18 /// Project-local uuid4 from `<project_root>/.anon_id`.
19 pub anonymous_id: &'a str,
20 /// Caller-supplied event name (e.g. `"cognee.forget"`).
21 pub event_name: &'a str,
22 /// Identity tuple repeated under the `user_properties` view.
23 pub user_properties: UserProperties<'a>,
24 /// Identity tuple plus `time` and the spread of caller-supplied
25 /// `additional_properties` (after URL sanitization).
26 pub properties: Properties<'a>,
27}
28
29/// Identity tuple under the `user_properties` view. Mirrors Python's
30/// nested object so dashboards that flatten only `user_properties`
31/// still see the full identity triplet.
32#[cfg(feature = "telemetry")]
33#[derive(Debug, Serialize)]
34pub struct UserProperties<'a> {
35 /// Cognee `User.id` or symbolic identifier (e.g. `"sdk"`).
36 pub user_id: &'a str,
37 /// Persistent device identifier (uuid5 from machine-id-derived seed).
38 pub persistent_id: &'a str,
39 /// HMAC-derived API-key tracking id (empty string when no key set).
40 pub api_key_tracking_id: &'a str,
41 /// Backward-compat alias of `api_key_tracking_id`. Same value.
42 pub api_key_hash: &'a str,
43}
44
45/// Wide identity + version + sanitized caller properties under the
46/// `properties` view. Caller-supplied `additional_properties` are
47/// flattened into this object on the wire — Python spreads the dict.
48///
49/// Reserved field names — callers MUST NOT pass any of these in
50/// `additional_properties` (collisions produce duplicate JSON keys
51/// with implementation-defined deduplication on the consumer side):
52/// `time`, `user_id`, `anonymous_id`, `persistent_id`,
53/// `api_key_tracking_id`, `api_key_hash`, `sdk_runtime`,
54/// `cognee_version`.
55#[cfg(feature = "telemetry")]
56#[derive(Debug, Serialize)]
57pub struct Properties<'a> {
58 /// `MM/DD/YYYY` of the current date — Python's
59 /// `current_time.strftime("%m/%d/%Y")`.
60 pub time: String,
61 /// Identity tuple, repeated for analytics dashboards that flatten
62 /// only the `properties` view.
63 pub user_id: &'a str,
64 /// Identity tuple, repeated for analytics dashboards that flatten
65 /// only the `properties` view.
66 pub anonymous_id: &'a str,
67 /// Identity tuple, repeated for analytics dashboards that flatten
68 /// only the `properties` view.
69 pub persistent_id: &'a str,
70 /// Identity tuple, repeated for analytics dashboards that flatten
71 /// only the `properties` view.
72 pub api_key_tracking_id: &'a str,
73 /// Backward-compat alias of `api_key_tracking_id`. Same value.
74 pub api_key_hash: &'a str,
75 /// `sdk_runtime: "rust"` — added per locked decision 2 so the
76 /// proxy can distinguish Rust vs Python events without losing
77 /// cross-SDK identity grouping.
78 pub sdk_runtime: &'static str,
79 /// Cognee crate version — `env!("CARGO_PKG_VERSION")`.
80 pub cognee_version: &'static str,
81 /// Caller-supplied properties, already sanitized by
82 /// `sanitize_nested_properties` (URL keys hashed). Flattened into
83 /// the parent object on the wire — Python spreads the dict.
84 #[serde(flatten)]
85 pub additional: AdditionalProperties,
86}
87
88/// A `serde_json::Value::Object` flattened into [`Properties`]. Modelled
89/// as a wrapper so the `#[serde(flatten)]` works correctly on a
90/// `Value` and so we can hand mutable access out for sanitization.
91#[cfg(feature = "telemetry")]
92#[derive(Debug, Default, Serialize)]
93#[serde(transparent)]
94pub struct AdditionalProperties {
95 inner: serde_json::Map<String, Value>,
96}
97
98#[cfg(feature = "telemetry")]
99impl AdditionalProperties {
100 /// Construct from a caller-provided `Value::Object`. Anything
101 /// other than an object (e.g. `Value::Array`, `Value::String`)
102 /// is dropped with a `tracing::debug` log and treated as empty —
103 /// Python coerces silently, we diverge for safety since the
104 /// payload contract requires a flat object.
105 pub fn from_value(v: Option<Value>) -> Self {
106 match v {
107 Some(Value::Object(map)) => Self { inner: map },
108 Some(other) => {
109 tracing::debug!(
110 target: "cognee.telemetry",
111 actual_type = std::any::type_name_of_val(&other),
112 "additional_properties was not an object; dropping"
113 );
114 Self::default()
115 }
116 None => Self::default(),
117 }
118 }
119
120 /// Take the inner map out as a [`Value::Object`], leaving `self`
121 /// empty. Pair with [`Self::replace_with`] after sanitizing.
122 pub fn as_value_mut(&mut self) -> Value {
123 Value::Object(std::mem::take(&mut self.inner))
124 }
125
126 /// Restore from a sanitized [`Value`]. Non-object values are
127 /// silently dropped (defensive — sanitization should never change
128 /// the outer type).
129 pub fn replace_with(&mut self, v: Value) {
130 if let Value::Object(map) = v {
131 self.inner = map;
132 }
133 }
134}
135
136/// Format the current date as `MM/DD/YYYY` to match Python's
137/// `current_time.strftime("%m/%d/%Y")` (utils.py:206).
138#[cfg(feature = "telemetry")]
139pub fn format_time_field(now: chrono::DateTime<chrono::Utc>) -> String {
140 now.format("%m/%d/%Y").to_string()
141}
142
143#[cfg(all(test, feature = "telemetry"))]
144#[allow(
145 clippy::unwrap_used,
146 clippy::expect_used,
147 reason = "test code — panics are acceptable failures"
148)]
149mod tests {
150 use super::*;
151 use serde_json::json;
152
153 #[test]
154 fn time_field_format() {
155 let when = chrono::DateTime::parse_from_rfc3339("2026-05-06T12:00:00Z")
156 .expect("rfc3339 fixture")
157 .with_timezone(&chrono::Utc);
158 assert_eq!(format_time_field(when), "05/06/2026");
159 }
160
161 #[test]
162 fn payload_roundtrips_to_python_compatible_json() {
163 let additional = AdditionalProperties::from_value(Some(json!({
164 "endpoint": "POST /api/v1/forget",
165 })));
166 let payload = TelemetryPayload {
167 anonymous_id: "a-id",
168 event_name: "cognee.forget",
169 user_properties: UserProperties {
170 user_id: "u-id",
171 persistent_id: "p-id",
172 api_key_tracking_id: "ak_deadbeefcafebabe0123456789abcdef",
173 api_key_hash: "ak_deadbeefcafebabe0123456789abcdef",
174 },
175 properties: Properties {
176 time: "05/06/2026".into(),
177 user_id: "u-id",
178 anonymous_id: "a-id",
179 persistent_id: "p-id",
180 api_key_tracking_id: "ak_deadbeefcafebabe0123456789abcdef",
181 api_key_hash: "ak_deadbeefcafebabe0123456789abcdef",
182 sdk_runtime: "rust",
183 cognee_version: "0.1.0",
184 additional,
185 },
186 };
187 let v = serde_json::to_value(&payload).expect("serialize");
188 // Spot-check the wire schema.
189 assert_eq!(v["anonymous_id"], "a-id");
190 assert_eq!(v["event_name"], "cognee.forget");
191 assert_eq!(
192 v["user_properties"]["api_key_hash"],
193 v["user_properties"]["api_key_tracking_id"]
194 );
195 assert_eq!(v["properties"]["sdk_runtime"], "rust");
196 assert_eq!(v["properties"]["time"], "05/06/2026");
197 // additional_properties were flattened.
198 assert_eq!(v["properties"]["endpoint"], "POST /api/v1/forget");
199 }
200
201 #[test]
202 fn from_value_drops_non_object() {
203 let arr = AdditionalProperties::from_value(Some(json!([1, 2, 3])));
204 let out = serde_json::to_value(&arr).expect("serialize");
205 assert_eq!(out, json!({}));
206
207 let none = AdditionalProperties::from_value(None);
208 let out = serde_json::to_value(&none).expect("serialize");
209 assert_eq!(out, json!({}));
210 }
211}