Skip to main content

agent_block_types/
obs.rs

1use std::borrow::Cow;
2use std::sync::OnceLock;
3
4use reqwest::Url;
5use uuid::Uuid;
6
7const REDACTED: &str = "[REDACTED]";
8
9/// Returns a process-scoped agent ID that is generated once and reused for the
10/// lifetime of the process.  The semantic scope of `agent_id` is
11/// "one agent-block execution", which is coarser than `run_id` (per-call).
12/// Both currently collapse to the same generated value in single-run
13/// invocations, but the conceptual distinction is preserved so that future
14/// deployments can evolve the two scopes independently (e.g. long-running
15/// daemon vs. per-request).
16fn process_agent_id() -> &'static str {
17    static AGENT_ID: OnceLock<String> = OnceLock::new();
18    AGENT_ID.get_or_init(|| Uuid::new_v4().to_string())
19}
20
21/// Build the observability context tuple `(trace_id, run_id, agent_id, agent_name)`.
22///
23/// Resolution order for `agent_id`:
24/// 1. `AGENT_BLOCK_AGENT_ID` environment variable (non-empty)
25/// 2. `fallback_agent_id` argument (non-None)
26/// 3. Process-scoped auto-generated UUID v4 (generated once per process lifetime).
27///    Scope: one agent-block execution = one `agent_id`.  Conceptually coarser than
28///    `run_id` (per-call), though both may share the same value in simple invocations.
29pub fn obs_context(fallback_agent_id: Option<&str>) -> (String, String, String, String) {
30    let trace_id = std::env::var("AGENT_BLOCK_TRACE_ID").unwrap_or_default();
31    let run_id = std::env::var("AGENT_BLOCK_RUN_ID").unwrap_or_default();
32    let agent_id = std::env::var("AGENT_BLOCK_AGENT_ID")
33        .ok()
34        .filter(|v| !v.is_empty())
35        .or_else(|| fallback_agent_id.map(ToString::to_string))
36        .unwrap_or_else(|| process_agent_id().to_string());
37    let agent_name = std::env::var("AGENT_BLOCK_AGENT_NAME").unwrap_or_default();
38    (trace_id, run_id, agent_id, agent_name)
39}
40
41pub fn obs_line(
42    component: &str,
43    event: &str,
44    ctx: &(String, String, String, String),
45    extra: &[(&str, &str)],
46) -> String {
47    let mut parts = vec![
48        "prefix=ab.obs".to_string(),
49        format!("event={}", event),
50        format!("component={}", component),
51        format!("trace_id={}", kv_escape("trace_id", &ctx.0)),
52        format!("run_id={}", kv_escape("run_id", &ctx.1)),
53        format!("agent_id={}", kv_escape("agent_id", &ctx.2)),
54        format!("agent_name={}", kv_escape("agent_name", &ctx.3)),
55    ];
56    for (k, v) in extra {
57        parts.push(format!("{}={}", k, kv_escape(k, v)));
58    }
59    parts.join(" ")
60}
61
62fn kv_escape(key: &str, value: &str) -> String {
63    let safe = sanitize_value(key, value);
64    if safe.is_empty() {
65        "\"\"".to_string()
66    } else if safe.chars().any(|c| c.is_whitespace() || c == '=') {
67        serde_json::Value::String(safe.into_owned()).to_string()
68    } else {
69        safe.into_owned()
70    }
71}
72
73fn sanitize_value<'a>(key: &str, value: &'a str) -> Cow<'a, str> {
74    if is_sensitive_key(key) {
75        return Cow::Borrowed(REDACTED);
76    }
77    if key.eq_ignore_ascii_case("url") {
78        return Cow::Owned(sanitize_url(value));
79    }
80    Cow::Borrowed(value)
81}
82
83fn is_sensitive_key(key: &str) -> bool {
84    let k = key.to_ascii_lowercase();
85    [
86        "authorization",
87        "cookie",
88        "set-cookie",
89        "token",
90        "secret",
91        "password",
92        "passwd",
93        "api_key",
94        "apikey",
95        "access_key",
96        "private_key",
97    ]
98    .iter()
99    .any(|needle| k.contains(needle))
100}
101
102pub fn sanitize_url(raw: &str) -> String {
103    match Url::parse(raw) {
104        Ok(mut u) => {
105            let _ = u.set_username("");
106            let _ = u.set_password(None);
107            u.set_query(None);
108            u.set_fragment(None);
109            u.to_string()
110        }
111        Err(_) => {
112            // URL is not parseable: preserve the first 16 characters so a human
113            // can identify the target from logs (e.g. a typo like "htps://...").
114            // Strip both `user:pass@` userinfo AND `?query` / `#fragment` so
115            // credentials or tokens embedded anywhere in the authority / query /
116            // fragment cannot leak even in the truncated form.  Path remains
117            // (truncated) so the host/path prefix is still identifiable.
118            let sanitized = redact_userinfo(raw);
119            let cut_end = sanitized.find(['?', '#']).unwrap_or(sanitized.len());
120            let trimmed = &sanitized[..cut_end];
121            if trimmed.len() <= 16 {
122                trimmed.to_string()
123            } else {
124                format!("{}...", &trimmed[..16])
125            }
126        }
127    }
128}
129
130/// Remove `user:pass@` userinfo from an unparseable URL string.
131///
132/// Uses simple substring heuristics: looks for `://` (scheme separator) then
133/// `@` within the authority. If found, replaces the `user:pass@` span with
134/// an empty string. If not found, returns the input unchanged.
135fn redact_userinfo(raw: &str) -> String {
136    if let Some(scheme_end) = raw.find("://") {
137        let after_scheme = scheme_end + 3;
138        let authority = &raw[after_scheme..];
139        if let Some(at_pos) = authority.find('@') {
140            // Reconstruct: scheme + "://" + everything after "@"
141            let scheme_and_sep = &raw[..after_scheme];
142            let host_and_rest = &authority[at_pos + 1..];
143            return format!("{}{}", scheme_and_sep, host_and_rest);
144        }
145    }
146    raw.to_string()
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn process_agent_id_is_non_empty_and_stable() {
155        // process_agent_id() must return a non-empty value and the same
156        // value on every call (OnceLock semantics within this process).
157        let id1 = process_agent_id();
158        let id2 = process_agent_id();
159        assert!(!id1.is_empty(), "process_agent_id must not be empty");
160        assert_eq!(
161            id1, id2,
162            "process_agent_id must be stable within the process"
163        );
164    }
165
166    #[test]
167    fn obs_context_fallback_agent_id_wins_over_auto() {
168        // When ENV is absent and fallback_agent_id is provided, it takes priority.
169        // This test avoids mutating global ENV to prevent parallelism flakiness.
170        // We temporarily unset via a guard-free approach: only valid if ENV is absent.
171        // Use a distinctive value that cannot collide with a real env setting.
172        let fallback = "test-fallback-agent-xxx";
173        // Ensure ENV is not set to this value (it may be set to something else).
174        // If ENV IS set, skip assertion on fallback path (env wins per spec).
175        if std::env::var("AGENT_BLOCK_AGENT_ID")
176            .unwrap_or_default()
177            .is_empty()
178        {
179            let (_, _, id, _) = obs_context(Some(fallback));
180            assert_eq!(id, fallback);
181        }
182    }
183
184    #[test]
185    fn sanitize_url_strips_credentials_and_query() {
186        let raw = "https://user:pass@example.com/path?q=1&r=2#frag";
187        let got = sanitize_url(raw);
188        assert_eq!(got, "https://example.com/path");
189    }
190
191    #[test]
192    fn sanitize_url_malformed_truncates_to_16_chars() {
193        // Previously returned "[UNPARSEABLE]"; now returns the first 16 chars
194        // of the input so log readers can identify the target.
195        let raw = "not a valid url ://::garbage";
196        let got = sanitize_url(raw);
197        // First 16 chars of "not a valid url " are "not a valid url " — truncated with "..."
198        assert_eq!(got, "not a valid url ...");
199    }
200
201    #[test]
202    fn sanitize_url_empty_string_returns_empty() {
203        let got = sanitize_url("");
204        assert_eq!(got, "");
205    }
206
207    #[test]
208    fn sanitize_url_short_unparseable_returns_as_is() {
209        // A short but unparseable URL (≤16 chars) is returned without truncation.
210        let raw = "htps://x.com";
211        let got = sanitize_url(raw);
212        assert_eq!(got, "htps://x.com");
213    }
214
215    #[test]
216    fn sanitize_url_unparseable_strips_userinfo() {
217        // Even for unparseable URLs, obvious user:pass@ patterns are stripped.
218        let raw = "htps://user:pass@example.com/path";
219        let got = sanitize_url(raw);
220        // After stripping: "htps://example.com/path" → first 16: "htps://example.c" + "..."
221        assert!(
222            !got.contains("pass"),
223            "password must be stripped from unparseable URL: {got}"
224        );
225        assert!(
226            !got.contains("user"),
227            "username must be stripped from unparseable URL: {got}"
228        );
229    }
230
231    #[test]
232    fn sanitize_url_unparseable_strips_query_and_fragment() {
233        // Even for unparseable URLs, secrets embedded in ?query or #fragment
234        // must be stripped before truncation.  Previously the 16-char truncate
235        // could leak a partial token when the secret sat within the first 16
236        // bytes of the input.
237        let raw = "htps://api.x/?token=SUPER_SECRET_VALUE_XYZ";
238        let got = sanitize_url(raw);
239        assert!(
240            !got.contains("SECRET"),
241            "query secret must be stripped: {got}"
242        );
243        assert!(
244            !got.contains("token"),
245            "query key must also be stripped: {got}"
246        );
247
248        let raw2 = "htps://api.x/#token=SECRET";
249        let got2 = sanitize_url(raw2);
250        assert!(
251            !got2.contains("SECRET"),
252            "fragment secret must be stripped: {got2}"
253        );
254    }
255}