hotdata 0.1.2

Powerful data platform API for datasets, queries, and analytics.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
//! Request/response debug logging for the Hotdata Rust SDK.
//!
//! Every outgoing HTTP call in this crate funnels its request and response
//! through this module: the generated free functions in `apis::*` (via the
//! `api.mustache` template), and the hand-written ergonomic layer —
//! [`Client::submit_query`](crate::client::Client::submit_query),
//! [`Client::upload_stream`](crate::client::Client::upload_stream), the Arrow
//! result fetch in [`crate::arrow`], and the API-token -> JWT mint in
//! [`crate::auth`]. Each emits `log::debug!` records on the [`TARGET`]
//! (`hotdata::http`) target so a host can switch them on with any `log` backend
//! and render them however it likes (e.g. the CLI's `--debug` flag, which maps
//! this target to its `>>> METHOD url` / `<<< status` output).
//!
//! The SDK itself installs no logger and prints nothing on its own — these are
//! plain `log` facade records. With no backend, or with the target filtered
//! out, the cost is a single atomic load per call (see [`log_request`]); the
//! bodies are only stringified/redacted when the target is actually enabled.
//!
//! **Redaction.** Sensitive material is masked before it ever reaches the log
//! facade, mirroring the CLI's `mask_credential`: `Authorization` bearer tokens
//! are masked (scheme preserved), and known-sensitive JSON object keys / form
//! fields (`api_token`, `refresh_token`, `secret`, `password`, …) have their
//! values masked in place — recursively, so nested bodies are covered.
//!
//! This module is hand-written and listed in `.openapi-generator-ignore`, so it
//! survives client regeneration; the `api.mustache` template emits the
//! `crate::http_log::*` calls at every generated op.

use log::{debug, log_enabled, Level};

/// `log` target for every HTTP debug record this crate emits. Hosts filter on
/// this to route the SDK's wire logs (e.g. the CLI maps it to its `--debug`
/// output) without picking up unrelated `log` traffic.
pub const TARGET: &str = "hotdata::http";

/// JSON object keys and `x-www-form-urlencoded` field names whose values are
/// masked before logging. Compared case-insensitively. Mirrors the credentials
/// the CLI redacts so SDK and CLI debug output stay consistent.
const SENSITIVE_KEYS: &[&str] = &[
    "authorization",
    "api_token",
    "api_key",
    "access_token",
    "refresh_token",
    "token",
    "client_secret",
    "secret",
    "secret_value",
    // The Secrets API write body field (`CreateSecretRequest`/`UpdateSecretRequest`).
    // Collides with the benign `CategoryValueInfo.value`, but masking an analytics
    // value in a debug log is far cheaper than leaking a stored secret.
    "value",
    "password",
    "passwd",
    "private_key",
    "credentials",
    "connection_string",
];

/// Placeholder substituted for a sensitive non-string value (object, array,
/// number, bool) so nested secrets can never leak through a sensitive key.
const REDACTED: &str = "<redacted>";

/// Cap on the rendered length of a non-JSON, non-form body so a stray large or
/// binary-ish payload can't flood the log.
const MAX_BODY_LEN: usize = 4096;

/// Whether the HTTP debug target is currently enabled. Call sites guard on this
/// (cheaply) before doing any redaction work.
fn enabled() -> bool {
    log_enabled!(target: TARGET, Level::Debug)
}

/// Mask a credential to its first + last 4 characters (`XXXX...YYYY`), or `***`
/// if it is too short to reveal anything safely. Mirrors the CLI's
/// `mask_credential` so SDK and CLI debug logs read identically; the visible
/// tail makes it easy to tell which token is on the wire.
pub fn mask_credential(s: &str) -> String {
    // Index by `char`, not byte: this runs on arbitrary JSON string values, so a
    // non-ASCII secret would otherwise panic on a non-char-boundary byte slice.
    let chars: Vec<char> = s.chars().collect();
    let n = chars.len();
    let head = |k: usize| -> String { chars[..k].iter().collect() };
    if n >= 12 {
        let tail: String = chars[n - 4..].iter().collect();
        format!("{}...{}", head(4), tail)
    } else if n > 4 {
        // Short-ish: show the head but no tail, so we don't reveal most of it.
        format!("{}...", head(4))
    } else {
        "***".into()
    }
}

/// Whether a JSON key / form field name names sensitive material to mask.
fn is_sensitive(key: &str) -> bool {
    SENSITIVE_KEYS.iter().any(|k| key.eq_ignore_ascii_case(k))
}

/// Mask an `Authorization` header value, preserving the scheme prefix
/// (`Bearer`, `Basic`, …) so the log still reads sensibly.
fn mask_auth_value(value: &str) -> String {
    if let Some(token) = value.strip_prefix("Bearer ") {
        format!("Bearer {}", mask_credential(token))
    } else {
        mask_credential(value)
    }
}

/// Log an outgoing request: `>>> METHOD url`, each header (with `Authorization`
/// masked), and the request body with sensitive fields redacted.
///
/// Called after `req_builder.build()?` and before `client.execute(req)` (which
/// consumes the request). Streaming bodies (file/byte-stream uploads) report
/// their kind rather than buffering — `reqwest::Body::as_bytes` only yields the
/// in-memory bodies (`.json(..)` / `.form(..)` / `.body(bytes)`).
pub fn log_request(req: &reqwest::Request) {
    if !enabled() {
        return;
    }
    debug!(target: TARGET, ">>> {} {}", req.method(), req.url());
    for (name, value) in req.headers() {
        let key = name.as_str();
        let shown = match value.to_str() {
            Ok(v) if key.eq_ignore_ascii_case("authorization") => mask_auth_value(v),
            Ok(v) => v.to_string(),
            Err(_) => "<non-utf8>".to_string(),
        };
        debug!(target: TARGET, "  {key}: {shown}");
    }
    match req.body().and_then(reqwest::Body::as_bytes) {
        Some(bytes) if !bytes.is_empty() => debug!(target: TARGET, "{}", redact_body(bytes)),
        Some(_) => {}
        // No in-memory body: either a bodyless request or a streamed upload.
        None if req.body().is_some() => debug!(target: TARGET, "[streaming body]"),
        None => {}
    }
}

/// Log a response status line: `<<< 200 OK`.
///
/// Emitted once per call, right after the status is read, so it covers every
/// branch (including empty-body and streamed-body responses) uniformly.
pub fn log_response_status(status: reqwest::StatusCode) {
    if !enabled() {
        return;
    }
    debug!(
        target: TARGET,
        "<<< {} {}",
        status.as_u16(),
        status.canonical_reason().unwrap_or("")
    );
}

/// Log a response body with sensitive fields redacted. No-op for an empty body.
pub fn log_response_body(body: &str) {
    if !enabled() || body.is_empty() {
        return;
    }
    debug!(target: TARGET, "{}", redact_body(body.as_bytes()));
}

/// Render a request/response body for logging with sensitive values masked.
///
/// JSON bodies are parsed and masked structurally (recursing into nested
/// objects/arrays). A non-JSON body is treated as `x-www-form-urlencoded` when
/// it parses as `k=v(&k=v)*` (the shape of the auth mint body) and its
/// sensitive fields are masked; anything else is shown verbatim. The rendered
/// output is length-capped on every path so a large inline result (e.g. a big
/// JSON query response) can't flood a host's log backend.
fn redact_body(bytes: &[u8]) -> String {
    let text = match std::str::from_utf8(bytes) {
        Ok(t) => t,
        Err(_) => return format!("[binary: {} bytes]", bytes.len()),
    };

    let rendered = if let Ok(mut value) = serde_json::from_str::<serde_json::Value>(text) {
        redact_json(&mut value);
        serde_json::to_string(&value).unwrap_or_else(|_| text.to_string())
    } else if let Some(form) = redact_form(text) {
        form
    } else {
        text.to_string()
    };

    truncate(&rendered)
}

/// Recursively mask the values of sensitive keys in a JSON value, in place.
///
/// A sensitive key's value is masked *whole*, whatever its type: a string keeps
/// a head/tail hint, while an object/array/number/bool collapses to
/// [`REDACTED`]. That matters because a sensitive key can hold structured
/// secrets (e.g. `{"credentials": {"password": "…"}}`) — masking only string
/// values would log the surrounding object in the clear.
fn redact_json(value: &mut serde_json::Value) {
    match value {
        serde_json::Value::Object(map) => {
            for (key, val) in map.iter_mut() {
                if is_sensitive(key) {
                    *val = redacted_value(val);
                } else {
                    redact_json(val);
                }
            }
        }
        serde_json::Value::Array(items) => items.iter_mut().for_each(redact_json),
        _ => {}
    }
}

/// Mask a value that sits under a sensitive key. Strings keep a head/tail hint
/// (so a token is still identifiable); `null` stays `null` (nothing to hide);
/// every other type collapses to [`REDACTED`] so nested secrets can't leak.
fn redacted_value(value: &serde_json::Value) -> serde_json::Value {
    match value {
        serde_json::Value::String(s) => serde_json::Value::String(mask_credential(s)),
        serde_json::Value::Null => serde_json::Value::Null,
        _ => serde_json::Value::String(REDACTED.to_string()),
    }
}

/// Mask sensitive fields in an `x-www-form-urlencoded` body, returning `None`
/// if `text` doesn't look like one (so the caller can fall back to verbatim).
///
/// "Looks like a form" means every `&`-separated segment is a non-empty `key=…`
/// pair — true for the SDK's only non-JSON in-memory body (the token mint), and
/// false for arbitrary prose, which then logs verbatim.
fn redact_form(text: &str) -> Option<String> {
    let segments: Vec<&str> = text.split('&').collect();
    let looks_like_form = segments.iter().all(|seg| {
        seg.split_once('=')
            .is_some_and(|(k, _)| !k.is_empty() && !k.contains(char::is_whitespace))
    });
    if !looks_like_form {
        return None;
    }
    let redacted = segments
        .iter()
        .map(|seg| match seg.split_once('=') {
            Some((k, v)) if is_sensitive(k) => format!("{k}={}", mask_credential(v)),
            _ => seg.to_string(),
        })
        .collect::<Vec<_>>()
        .join("&");
    Some(redacted)
}

/// Truncate an over-long body for logging, appending an elision marker.
fn truncate(text: &str) -> String {
    if text.len() <= MAX_BODY_LEN {
        return text.to_string();
    }
    // Cut on a char boundary at or below the cap.
    let mut end = MAX_BODY_LEN;
    while !text.is_char_boundary(end) {
        end -= 1;
    }
    format!("{}… [{} bytes total]", &text[..end], text.len())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn mask_credential_long_shows_head_and_tail() {
        assert_eq!(mask_credential("abcdefghijkl"), "abcd...ijkl");
        assert_eq!(
            mask_credential("hd_0123456789abcdef"),
            "hd_0...cdef"
        );
    }

    #[test]
    fn mask_credential_medium_shows_head_only() {
        // 5..=11 chars: head only, no tail.
        assert_eq!(mask_credential("abcdef"), "abcd...");
    }

    #[test]
    fn mask_credential_short_is_fully_hidden() {
        assert_eq!(mask_credential("abcd"), "***");
        assert_eq!(mask_credential(""), "***");
    }

    #[test]
    fn mask_auth_preserves_bearer_scheme() {
        assert_eq!(
            mask_auth_value("Bearer eyJhbGciOiJIUzI1NiJ9.payload.signature"),
            "Bearer eyJh...ture"
        );
        // Non-bearer values are masked whole.
        assert_eq!(mask_auth_value("Basic dXNlcjpwYXNz"), "Basi...YXNz");
    }

    #[test]
    fn json_body_masks_sensitive_keys_recursively() {
        let body = serde_json::json!({
            "name": "prod-db",
            "secret": "supersecretvalue123",
            "nested": { "api_token": "hd_abcdef0123456789", "keep": "visible" },
            "list": [ { "password": "hunter2hunter2" } ]
        })
        .to_string();
        let out = redact_body(body.as_bytes());
        let v: serde_json::Value = serde_json::from_str(&out).unwrap();
        assert_eq!(v["name"], "prod-db");
        assert_eq!(v["secret"], "supe...e123");
        assert_eq!(v["nested"]["api_token"], "hd_a...6789");
        assert_eq!(v["nested"]["keep"], "visible");
        assert_eq!(v["list"][0]["password"], "hunt...ter2");
        // The raw secret never appears in the rendered output.
        assert!(!out.contains("supersecretvalue123"));
        assert!(!out.contains("hd_abcdef0123456789"));
    }

    #[test]
    fn sensitive_object_value_is_fully_redacted() {
        // A sensitive key holding structured data must not leak its contents:
        // the whole value collapses to the placeholder (not just string leaves).
        let body = serde_json::json!({
            "credentials": { "password": "p4ssw0rd", "nested": { "token": "tkn" } },
            "secret": ["leak-a", "leak-b"],
            "keep": "visible"
        })
        .to_string();
        let out = redact_body(body.as_bytes());
        let v: serde_json::Value = serde_json::from_str(&out).unwrap();
        assert_eq!(v["credentials"], "<redacted>");
        assert_eq!(v["secret"], "<redacted>");
        assert_eq!(v["keep"], "visible");
        for leak in ["p4ssw0rd", "tkn", "leak-a", "leak-b"] {
            assert!(!out.contains(leak), "leaked {leak} via structured value:\n{out}");
        }
    }

    #[test]
    fn secret_value_and_api_key_fields_are_masked() {
        // The Secrets API `value` field and the embedding-provider `api_key`.
        let body = serde_json::json!({
            "name": "openai-key",
            "value": "supersecretvalue123",
            "api_key": "sk-abcdef0123456789"
        })
        .to_string();
        let out = redact_body(body.as_bytes());
        assert!(!out.contains("supersecretvalue123"), "secret value leaked:\n{out}");
        assert!(!out.contains("sk-abcdef0123456789"), "api_key leaked:\n{out}");
        let v: serde_json::Value = serde_json::from_str(&out).unwrap();
        assert_eq!(v["name"], "openai-key");
        assert_eq!(v["value"], "supe...e123");
    }

    #[test]
    fn non_ascii_secret_value_does_not_panic() {
        // Masking runs on arbitrary JSON strings; a multibyte secret must mask
        // on char boundaries rather than panic on a byte slice.
        let secret = "naïve—café—señor—secret—üñ";
        let body = serde_json::json!({ "secret": secret }).to_string();
        let out = redact_body(body.as_bytes());
        assert!(!out.contains(secret), "non-ascii secret leaked:\n{out}");
        // And the masker itself is char-safe on multibyte input.
        let _ = mask_credential(secret);
        assert_eq!(mask_credential("héllo wörld!"), "héll...rld!");
    }

    #[test]
    fn form_body_masks_sensitive_fields() {
        let body = "grant_type=api_token&api_token=hd_0123456789abcdef&client_id=hotdata-rust-sdk";
        let out = redact_body(body.as_bytes());
        assert!(out.contains("grant_type=api_token"));
        assert!(out.contains("client_id=hotdata-rust-sdk"));
        assert!(out.contains("api_token=hd_0...cdef"));
        assert!(!out.contains("hd_0123456789abcdef"));
    }

    #[test]
    fn non_form_text_is_logged_verbatim() {
        // Plain prose isn't mistaken for a form (no spurious masking/mangling).
        let body = "this is not a form body";
        assert_eq!(redact_body(body.as_bytes()), body);
    }

    #[test]
    fn binary_body_reports_byte_count() {
        let out = redact_body(&[0xff, 0xfe, 0x00, 0x01]);
        assert_eq!(out, "[binary: 4 bytes]");
    }

    #[test]
    fn overlong_plain_body_is_truncated() {
        let body = "x".repeat(MAX_BODY_LEN + 100);
        let out = redact_body(body.as_bytes());
        assert!(out.len() < body.len());
        assert!(out.contains("bytes total]"));
    }

    #[test]
    fn overlong_json_body_is_truncated() {
        // A large inline JSON result must be capped too, not just the verbatim
        // fallback — otherwise a big query response could flood the log backend.
        let big = "y".repeat(MAX_BODY_LEN * 2);
        let body = serde_json::json!({ "rows": big }).to_string();
        assert!(body.len() > MAX_BODY_LEN);
        let out = redact_body(body.as_bytes());
        assert!(out.len() <= MAX_BODY_LEN + 64, "json body not capped: {} bytes", out.len());
        assert!(out.contains("bytes total]"));
    }
}