Skip to main content

sentinel_core/
event.rs

1//! Core event types for the perf-sentinel pipeline.
2
3use std::sync::Arc;
4
5use serde::{Deserialize, Serialize};
6
7/// The type of I/O operation a span represents.
8#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum EventType {
11    Sql,
12    HttpOut,
13}
14
15/// Source context for the span (which endpoint/method triggered it).
16#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
17pub struct EventSource {
18    pub endpoint: String,
19    pub method: String,
20}
21
22/// Maximum allowed length for a `trace_id` or `span_id`.
23///
24/// OpenTelemetry specifies 32 hex chars for trace IDs and 16 for span IDs.
25/// We allow up to 128 chars to accommodate non-standard formats.
26pub const MAX_ID_LENGTH: usize = 128;
27
28/// Truncate an ID field (`trace_id`, `span_id`) to [`MAX_ID_LENGTH`].
29///
30/// Uses char-boundary-aware truncation to avoid panicking on multi-byte UTF-8.
31/// Delegates to [`truncate_field`] after a one-time clone to keep the
32/// char-boundary walk in a single place.
33#[must_use]
34pub fn sanitize_id(id: &str) -> String {
35    let mut s = id.to_string();
36    truncate_field(&mut s, MAX_ID_LENGTH);
37    s
38}
39
40/// Maximum length for the `service` field (bytes).
41pub const MAX_SERVICE_LENGTH: usize = 256;
42
43/// Maximum length for the `operation` field (bytes).
44pub const MAX_OPERATION_LENGTH: usize = 256;
45
46/// Maximum length for the `target` field (bytes).
47/// The SQL normalizer has its own 64 KB limit; this provides
48/// defense-in-depth at the ingestion boundary.
49pub const MAX_TARGET_LENGTH: usize = 65_536;
50
51/// Maximum length for `source.endpoint` and `source.method` (bytes).
52pub const MAX_SOURCE_LENGTH: usize = 512;
53
54/// Maximum length for `code_function` and `code_namespace` (bytes).
55pub const MAX_CODE_FUNCTION_LENGTH: usize = 512;
56
57/// Maximum length for `code_filepath` (bytes).
58pub const MAX_CODE_FILEPATH_LENGTH: usize = 1024;
59
60/// Maximum length for `code_namespace` (bytes).
61pub const MAX_CODE_NAMESPACE_LENGTH: usize = 512;
62
63/// Maximum length for a single instrumentation scope name (bytes).
64/// Real OpenTelemetry scope names are short (`io.opentelemetry.spring-data-3.0`
65/// is 33 bytes), so 256 leaves comfortable headroom while bounding the
66/// memory amplification of the per-finding Vec clone.
67pub const MAX_SCOPE_NAME_LENGTH: usize = 256;
68
69/// Maximum number of instrumentation scopes captured per span. Matches
70/// the OTLP parent-walk depth bound (`CODE_ATTRS_MAX_DEPTH = 8`). The
71/// JSON ingest path has no such structural bound, so the cap fires there.
72pub const MAX_INSTRUMENTATION_SCOPES: usize = 8;
73
74/// Truncate a string to `max_len` bytes on a char boundary.
75///
76/// Shared between span-event sanitization and the daemon ack store
77/// (`crate::daemon::ack`). Keep behavior strictly bytes-and-char-boundary,
78/// do not add domain-specific normalization here.
79pub(crate) fn truncate_field(s: &mut String, max_len: usize) {
80    if s.len() <= max_len {
81        return;
82    }
83    let mut end = max_len;
84    while end > 0 && !s.is_char_boundary(end) {
85        end -= 1;
86    }
87    s.truncate(end);
88}
89
90/// Drop the field if it contains any ASCII control character, otherwise truncate.
91///
92/// Mirrors the silent-drop posture used for `cloud.region` invalid values.
93/// Control characters in `code.*` would render badly in TUI/CLI output and
94/// could enable log-injection if any future log site emitted them raw.
95fn sanitize_optional_arc_str(field: &mut Option<Arc<str>>, max_len: usize) {
96    if field
97        .as_deref()
98        .is_some_and(crate::config::has_control_char)
99    {
100        *field = None;
101        return;
102    }
103    if let Some(s) = field.as_ref()
104        && s.len() > max_len
105    {
106        let mut tmp = s.to_string();
107        truncate_field(&mut tmp, max_len);
108        *field = Some(Arc::from(tmp));
109    }
110}
111
112/// Truncate an `Arc<str>` field in place via alloc-and-replace.
113///
114/// `Arc<str>` is immutable, so we materialize a `String`, truncate it
115/// on a char boundary, and rebuild a fresh `Arc::<str>::from(String)`
116/// that reuses the buffer (no double allocation).
117fn truncate_arc_str(field: &mut Arc<str>, max_len: usize) {
118    if field.len() <= max_len {
119        return;
120    }
121    let mut tmp = field.to_string();
122    truncate_field(&mut tmp, max_len);
123    *field = Arc::from(tmp);
124}
125
126/// Drop entries with control characters, truncate the remainder to
127/// `max_len` and cap the Vec at `max_count`.
128///
129/// Used for `instrumentation_scopes` (OpenTelemetry scope names from
130/// arbitrary agents, including the JSON ingest path which has no
131/// structural depth bound). Bounds both the per-element and per-event
132/// memory amplification when those scope names propagate into the
133/// per-finding clone.
134fn sanitize_arc_str_vec(field: &mut Vec<Arc<str>>, max_len: usize, max_count: usize) {
135    field.retain(|s| !crate::config::has_control_char(s));
136    if field.len() > max_count {
137        field.truncate(max_count);
138    }
139    for s in field.iter_mut() {
140        truncate_arc_str(s, max_len);
141    }
142}
143
144/// Sanitize all string fields on a [`SpanEvent`] to enforce length limits.
145///
146/// Maximum length for the `timestamp` field (bytes).
147/// ISO 8601 with microseconds and timezone is at most ~30 chars.
148const MAX_TIMESTAMP_LENGTH: usize = 64;
149
150/// Called at every ingestion boundary (OTLP, JSON, Jaeger, Zipkin) to
151/// prevent unbounded memory growth from oversized attribute values.
152/// Also truncates IDs (`trace_id`, `span_id`, `parent_span_id`) that
153/// are not already sanitized at the ingestion boundary for some formats
154/// (Jaeger, Zipkin, native JSON).
155pub fn sanitize_span_event(event: &mut SpanEvent) {
156    truncate_field(&mut event.timestamp, MAX_TIMESTAMP_LENGTH);
157    truncate_field(&mut event.trace_id, MAX_ID_LENGTH);
158    truncate_field(&mut event.span_id, MAX_ID_LENGTH);
159    if let Some(ref mut pid) = event.parent_span_id {
160        truncate_field(pid, MAX_ID_LENGTH);
161    }
162    // is_valid_region_id at OTLP/JSON ingest already caps cloud_region at
163    // 64 chars, but Jaeger/Zipkin paths leave the field None. Funnel
164    // through the same control-char + truncate helper as code_* for
165    // defense-in-depth on hand-crafted inputs.
166    sanitize_optional_arc_str(&mut event.cloud_region, MAX_ID_LENGTH);
167    truncate_arc_str(&mut event.service, MAX_SERVICE_LENGTH);
168    truncate_field(&mut event.operation, MAX_OPERATION_LENGTH);
169    truncate_field(&mut event.target, MAX_TARGET_LENGTH);
170    truncate_field(&mut event.source.endpoint, MAX_SOURCE_LENGTH);
171    truncate_field(&mut event.source.method, MAX_SOURCE_LENGTH);
172    sanitize_optional_arc_str(&mut event.code_function, MAX_CODE_FUNCTION_LENGTH);
173    sanitize_optional_arc_str(&mut event.code_filepath, MAX_CODE_FILEPATH_LENGTH);
174    sanitize_optional_arc_str(&mut event.code_namespace, MAX_CODE_NAMESPACE_LENGTH);
175    sanitize_arc_str_vec(
176        &mut event.instrumentation_scopes,
177        MAX_SCOPE_NAME_LENGTH,
178        MAX_INSTRUMENTATION_SCOPES,
179    );
180}
181
182/// Source code location extracted from `OTel` `code.*` span attributes.
183///
184/// Not all instrumentation agents emit these attributes. When present,
185/// they allow findings to point to the exact function and file where the
186/// anti-pattern originates.
187#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
188pub struct CodeLocation {
189    #[serde(default, skip_serializing_if = "Option::is_none")]
190    pub function: Option<String>,
191    #[serde(default, skip_serializing_if = "Option::is_none")]
192    pub filepath: Option<String>,
193    #[serde(default, skip_serializing_if = "Option::is_none")]
194    pub lineno: Option<u32>,
195    #[serde(default, skip_serializing_if = "Option::is_none")]
196    pub namespace: Option<String>,
197}
198
199impl CodeLocation {
200    /// Returns `true` when all fields are `None`.
201    #[must_use]
202    pub fn is_empty(&self) -> bool {
203        self.function.is_none()
204            && self.filepath.is_none()
205            && self.lineno.is_none()
206            && self.namespace.is_none()
207    }
208
209    /// Render the location as `namespace.function (filepath:lineno)`,
210    /// omitting absent parts. Returns an empty string when the location
211    /// has nothing displayable, so callers can skip the line entirely
212    /// rather than printing a bare `Source:` label.
213    ///
214    /// Single source of truth for the CLI text output, the SARIF
215    /// `physicalLocation` message, and the TUI detail panel.
216    #[must_use]
217    pub fn display_string(&self) -> String {
218        let mut src = String::new();
219        if let Some(ref ns) = self.namespace {
220            src.push_str(ns);
221            src.push('.');
222        }
223        if let Some(ref func) = self.function {
224            src.push_str(func);
225        }
226        let has_name = !src.is_empty();
227        if let Some(ref fp) = self.filepath {
228            if has_name {
229                src.push_str(" (");
230            }
231            src.push_str(fp);
232            if let Some(ln) = self.lineno {
233                src.push(':');
234                src.push_str(&ln.to_string());
235            }
236            if has_name {
237                src.push(')');
238            }
239        }
240        src
241    }
242}
243
244/// A single span event representing an I/O operation (SQL query, HTTP call).
245///
246/// Strings repeated across events of the same workload are stored as
247/// `Arc<str>` for cheap clones, per-event-unique strings stay as `String`.
248#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
249pub struct SpanEvent {
250    pub timestamp: String,
251    pub trace_id: String,
252    pub span_id: String,
253    #[serde(default, skip_serializing_if = "Option::is_none")]
254    pub parent_span_id: Option<String>,
255    pub service: Arc<str>,
256    /// Cloud region this span was emitted from, sourced from the `OTel`
257    /// `cloud.region` resource attribute (or span attribute as fallback).
258    ///
259    /// Used by the carbon scoring stage to apply per-region carbon
260    /// intensity coefficients in multi-region deployments. `None` when
261    /// the attribute is absent or when ingesting from formats that don't
262    /// carry it (`Jaeger`, `Zipkin`, raw `JSON` without explicit field).
263    #[serde(default, skip_serializing_if = "Option::is_none")]
264    pub cloud_region: Option<Arc<str>>,
265    #[serde(rename = "type")]
266    pub event_type: EventType,
267    /// SQL: `db.system` for OTLP (e.g. "postgresql"), verb for native JSON.
268    /// HTTP: request method (e.g. "GET").
269    pub operation: String,
270    pub target: String,
271    pub duration_us: u64,
272    pub source: EventSource,
273    #[serde(skip_serializing_if = "Option::is_none")]
274    pub status_code: Option<u16>,
275    /// HTTP response body size in bytes, sourced from the `OTel`
276    /// `http.response.body.size` attribute (or legacy
277    /// `http.response_content_length`). Used by the carbon scoring stage
278    /// for HTTP payload size tier classification and network transport
279    /// energy estimation. `None` for SQL spans or when the attribute is
280    /// absent.
281    #[serde(default, skip_serializing_if = "Option::is_none")]
282    pub response_size_bytes: Option<u64>,
283    /// `OTel` `code.function` attribute: the function name in the instrumented code.
284    #[serde(default, skip_serializing_if = "Option::is_none")]
285    pub code_function: Option<Arc<str>>,
286    /// `OTel` `code.filepath` attribute: the source file path.
287    #[serde(default, skip_serializing_if = "Option::is_none")]
288    pub code_filepath: Option<Arc<str>>,
289    /// `OTel` `code.lineno` attribute: the line number in the source file.
290    #[serde(default, skip_serializing_if = "Option::is_none")]
291    pub code_lineno: Option<u32>,
292    /// `OTel` `code.namespace` attribute: the namespace (e.g. Java package).
293    #[serde(default, skip_serializing_if = "Option::is_none")]
294    pub code_namespace: Option<Arc<str>>,
295    /// OpenTelemetry instrumentation scope names captured at ingest time:
296    /// the leaf span's scope at index 0, then each unique ancestor scope
297    /// up to a bounded depth. Lets framework detection identify Spring
298    /// Data, Hibernate, Quarkus, Helidon and friends from the
299    /// `io.opentelemetry.<module>` strings emitted by the agent, without
300    /// relying on user-code naming conventions.
301    #[serde(default, skip_serializing_if = "Vec::is_empty")]
302    pub instrumentation_scopes: Vec<Arc<str>>,
303}
304
305impl SpanEvent {
306    /// Build a [`CodeLocation`] from this span's `code_*` fields.
307    ///
308    /// Returns `None` when all four fields are absent.
309    #[must_use]
310    pub fn code_location(&self) -> Option<CodeLocation> {
311        if self.code_function.is_none()
312            && self.code_filepath.is_none()
313            && self.code_lineno.is_none()
314            && self.code_namespace.is_none()
315        {
316            return None;
317        }
318        Some(CodeLocation {
319            function: self.code_function.as_deref().map(String::from),
320            filepath: self.code_filepath.as_deref().map(String::from),
321            lineno: self.code_lineno,
322            namespace: self.code_namespace.as_deref().map(String::from),
323        })
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    #[test]
332    fn code_location_display_string_full() {
333        let loc = CodeLocation {
334            function: Some("OrderItemRepository.findByOrderId".to_string()),
335            filepath: Some("order-service/src/main/java/OrderItemRepository.java".to_string()),
336            lineno: Some(42),
337            namespace: Some("com.example.order.repository".to_string()),
338        };
339        assert_eq!(
340            loc.display_string(),
341            "com.example.order.repository.OrderItemRepository.findByOrderId \
342             (order-service/src/main/java/OrderItemRepository.java:42)"
343        );
344    }
345
346    #[test]
347    fn code_location_display_string_function_only() {
348        let loc = CodeLocation {
349            function: Some("fetchUser".to_string()),
350            filepath: None,
351            lineno: None,
352            namespace: None,
353        };
354        assert_eq!(loc.display_string(), "fetchUser");
355    }
356
357    #[test]
358    fn code_location_display_string_filepath_only() {
359        let loc = CodeLocation {
360            function: None,
361            filepath: Some("src/main.rs".to_string()),
362            lineno: Some(7),
363            namespace: None,
364        };
365        // No function or namespace, so no parentheses wrap; filepath
366        // still emits with its line number.
367        assert_eq!(loc.display_string(), "src/main.rs:7");
368    }
369
370    #[test]
371    fn code_location_display_string_empty_when_all_none() {
372        let loc = CodeLocation {
373            function: None,
374            filepath: None,
375            lineno: None,
376            namespace: None,
377        };
378        assert_eq!(loc.display_string(), "");
379        assert!(loc.is_empty());
380    }
381
382    fn sample_sql_json() -> &'static str {
383        r#"{
384            "timestamp": "2025-07-10T14:32:01.123Z",
385            "trace_id": "abc123-def456",
386            "span_id": "span-789",
387            "service": "order-svc",
388            "type": "sql",
389            "operation": "SELECT",
390            "target": "SELECT * FROM order_item WHERE order_id = 42",
391            "duration_us": 1200,
392            "source": {
393                "endpoint": "POST /api/orders/42/submit",
394                "method": "OrderService::create_order"
395            }
396        }"#
397    }
398
399    fn sample_http_json() -> &'static str {
400        r#"{
401            "timestamp": "2025-07-10T14:32:01.456Z",
402            "trace_id": "abc123-def456",
403            "span_id": "span-790",
404            "service": "order-svc",
405            "type": "http_out",
406            "operation": "GET",
407            "target": "http://user-svc:5000/api/users/user-123",
408            "duration_us": 15000,
409            "status_code": 200,
410            "source": {
411                "endpoint": "POST /api/orders/42/submit",
412                "method": "OrderService::create_order"
413            }
414        }"#
415    }
416
417    #[test]
418    fn deserialize_sql_event() {
419        let event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
420        assert_eq!(event.event_type, EventType::Sql);
421        assert_eq!(event.trace_id, "abc123-def456");
422        assert_eq!(&*event.service, "order-svc");
423        assert_eq!(event.target, "SELECT * FROM order_item WHERE order_id = 42");
424        assert_eq!(event.duration_us, 1200);
425        assert!(event.status_code.is_none());
426    }
427
428    #[test]
429    fn deserialize_http_event() {
430        let event: SpanEvent = serde_json::from_str(sample_http_json()).unwrap();
431        assert_eq!(event.event_type, EventType::HttpOut);
432        assert_eq!(event.status_code, Some(200));
433        assert_eq!(event.source.endpoint, "POST /api/orders/42/submit");
434    }
435
436    #[test]
437    fn serde_roundtrip_sql() {
438        let event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
439        let json = serde_json::to_string(&event).unwrap();
440        let back: SpanEvent = serde_json::from_str(&json).unwrap();
441        assert_eq!(event, back);
442    }
443
444    #[test]
445    fn serde_roundtrip_http() {
446        let event: SpanEvent = serde_json::from_str(sample_http_json()).unwrap();
447        let json = serde_json::to_string(&event).unwrap();
448        let back: SpanEvent = serde_json::from_str(&json).unwrap();
449        assert_eq!(event, back);
450    }
451
452    #[test]
453    fn sql_event_omits_status_code_in_json() {
454        let event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
455        let json = serde_json::to_string(&event).unwrap();
456        assert!(!json.contains("status_code"));
457    }
458
459    #[test]
460    fn deserialize_event_without_cloud_region_defaults_to_none() {
461        let event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
462        assert!(event.cloud_region.is_none());
463    }
464
465    #[test]
466    fn serde_roundtrip_with_cloud_region() {
467        let json = r#"{
468            "timestamp": "2025-07-10T14:32:01.123Z",
469            "trace_id": "abc123-def456",
470            "span_id": "span-789",
471            "service": "order-svc",
472            "cloud_region": "eu-west-3",
473            "type": "sql",
474            "operation": "SELECT",
475            "target": "SELECT 1",
476            "duration_us": 1200,
477            "source": {
478                "endpoint": "POST /api/orders/42/submit",
479                "method": "OrderService::create_order"
480            }
481        }"#;
482        let event: SpanEvent = serde_json::from_str(json).unwrap();
483        assert_eq!(event.cloud_region.as_deref(), Some("eu-west-3"));
484        let serialized = serde_json::to_string(&event).unwrap();
485        assert!(serialized.contains("\"cloud_region\":\"eu-west-3\""));
486        let back: SpanEvent = serde_json::from_str(&serialized).unwrap();
487        assert_eq!(event, back);
488    }
489
490    #[test]
491    fn cloud_region_omitted_when_none() {
492        let event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
493        let json = serde_json::to_string(&event).unwrap();
494        assert!(!json.contains("cloud_region"));
495    }
496
497    #[test]
498    fn sanitize_id_short_unchanged() {
499        assert_eq!(sanitize_id("abc-123"), "abc-123");
500    }
501
502    #[test]
503    fn sanitize_id_truncates_long() {
504        let long = "a".repeat(200);
505        let result = sanitize_id(&long);
506        assert_eq!(result.len(), MAX_ID_LENGTH);
507    }
508
509    #[test]
510    fn sanitize_id_exact_length_unchanged() {
511        let exact = "b".repeat(MAX_ID_LENGTH);
512        assert_eq!(sanitize_id(&exact), exact);
513    }
514
515    #[test]
516    fn sanitize_id_multibyte_no_panic() {
517        // 4-byte emoji repeated to exceed MAX_ID_LENGTH (200 bytes total)
518        let id = "\u{1F600}".repeat(50);
519        assert!(id.len() > MAX_ID_LENGTH);
520        let result = sanitize_id(&id);
521        assert!(result.len() <= MAX_ID_LENGTH);
522        // Must be valid UTF-8 (would panic in .to_string() if not)
523        assert!(result.is_char_boundary(result.len()));
524    }
525
526    #[test]
527    fn sanitize_id_two_byte_chars_no_panic() {
528        // 2-byte UTF-8 chars: é is 2 bytes
529        let id = "é".repeat(100); // 200 bytes
530        let result = sanitize_id(&id);
531        assert!(result.len() <= MAX_ID_LENGTH);
532        // Result should contain whole chars only (even byte count for 2-byte chars)
533        assert_eq!(result.len() % 2, 0);
534    }
535
536    // ------------------------------------------------------------------
537    // sanitize_span_event
538    // ------------------------------------------------------------------
539
540    fn make_event_with_field(field: &str, value: &str) -> SpanEvent {
541        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
542        match field {
543            "service" => event.service = Arc::from(value),
544            "operation" => event.operation = value.to_string(),
545            "target" => event.target = value.to_string(),
546            "endpoint" => event.source.endpoint = value.to_string(),
547            "method" => event.source.method = value.to_string(),
548            _ => panic!("unknown field: {field}"),
549        }
550        event
551    }
552
553    #[test]
554    fn sanitize_truncates_long_service() {
555        let mut event = make_event_with_field("service", &"x".repeat(500));
556        sanitize_span_event(&mut event);
557        assert!(event.service.len() <= MAX_SERVICE_LENGTH);
558    }
559
560    #[test]
561    fn sanitize_truncates_long_operation() {
562        let mut event = make_event_with_field("operation", &"x".repeat(500));
563        sanitize_span_event(&mut event);
564        assert!(event.operation.len() <= MAX_OPERATION_LENGTH);
565    }
566
567    #[test]
568    fn sanitize_truncates_long_target() {
569        let mut event = make_event_with_field("target", &"x".repeat(100_000));
570        sanitize_span_event(&mut event);
571        assert!(event.target.len() <= MAX_TARGET_LENGTH);
572    }
573
574    #[test]
575    fn sanitize_truncates_long_endpoint() {
576        let mut event = make_event_with_field("endpoint", &"x".repeat(1000));
577        sanitize_span_event(&mut event);
578        assert!(event.source.endpoint.len() <= MAX_SOURCE_LENGTH);
579    }
580
581    #[test]
582    fn sanitize_truncates_long_method() {
583        let mut event = make_event_with_field("method", &"x".repeat(1000));
584        sanitize_span_event(&mut event);
585        assert!(event.source.method.len() <= MAX_SOURCE_LENGTH);
586    }
587
588    #[test]
589    fn sanitize_short_fields_unchanged() {
590        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
591        let before = event.clone();
592        sanitize_span_event(&mut event);
593        assert_eq!(event, before);
594    }
595
596    #[test]
597    fn sanitize_multibyte_char_boundary() {
598        // Service with 4-byte emojis that would split mid-char at MAX_SERVICE_LENGTH
599        let mut event = make_event_with_field("service", &"\u{1F600}".repeat(100));
600        sanitize_span_event(&mut event);
601        assert!(event.service.len() <= MAX_SERVICE_LENGTH);
602        // Must be valid UTF-8 (String invariant guarantees this, but verify)
603        assert!(event.service.is_char_boundary(event.service.len()));
604    }
605
606    // ------------------------------------------------------------------
607    // CodeLocation and code_* fields
608    // ------------------------------------------------------------------
609
610    #[test]
611    fn code_location_is_empty_when_all_none() {
612        let loc = CodeLocation {
613            function: None,
614            filepath: None,
615            lineno: None,
616            namespace: None,
617        };
618        assert!(loc.is_empty());
619    }
620
621    #[test]
622    fn code_location_not_empty_with_function() {
623        let loc = CodeLocation {
624            function: Some("processItems".to_string()),
625            filepath: None,
626            lineno: None,
627            namespace: None,
628        };
629        assert!(!loc.is_empty());
630    }
631
632    #[test]
633    fn span_event_code_location_none_when_all_absent() {
634        let event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
635        assert!(event.code_location().is_none());
636    }
637
638    #[test]
639    fn span_event_code_location_some_when_present() {
640        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
641        event.code_function = Some(Arc::from("processItems"));
642        event.code_filepath = Some(Arc::from("src/OrderService.java"));
643        event.code_lineno = Some(42);
644        event.code_namespace = Some(Arc::from("com.example"));
645        let loc = event.code_location().unwrap();
646        assert_eq!(loc.function.as_deref(), Some("processItems"));
647        assert_eq!(loc.filepath.as_deref(), Some("src/OrderService.java"));
648        assert_eq!(loc.lineno, Some(42));
649        assert_eq!(loc.namespace.as_deref(), Some("com.example"));
650    }
651
652    #[test]
653    fn serde_roundtrip_with_code_fields() {
654        let json = r#"{
655            "timestamp": "2025-07-10T14:32:01.123Z",
656            "trace_id": "abc123",
657            "span_id": "span-1",
658            "service": "svc",
659            "type": "sql",
660            "operation": "SELECT",
661            "target": "SELECT 1",
662            "duration_us": 100,
663            "source": { "endpoint": "GET /test", "method": "test" },
664            "code_function": "processItems",
665            "code_filepath": "src/OrderService.java",
666            "code_lineno": 42,
667            "code_namespace": "com.example"
668        }"#;
669        let event: SpanEvent = serde_json::from_str(json).unwrap();
670        assert_eq!(event.code_function.as_deref(), Some("processItems"));
671        assert_eq!(event.code_lineno, Some(42));
672        let serialized = serde_json::to_string(&event).unwrap();
673        let back: SpanEvent = serde_json::from_str(&serialized).unwrap();
674        assert_eq!(event, back);
675    }
676
677    #[test]
678    fn code_fields_omitted_when_none() {
679        let event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
680        let json = serde_json::to_string(&event).unwrap();
681        assert!(!json.contains("code_function"));
682        assert!(!json.contains("code_filepath"));
683        assert!(!json.contains("code_lineno"));
684        assert!(!json.contains("code_namespace"));
685    }
686
687    #[test]
688    fn sanitize_truncates_long_code_function() {
689        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
690        event.code_function = Some(Arc::from("x".repeat(1000)));
691        sanitize_span_event(&mut event);
692        assert!(event.code_function.as_ref().unwrap().len() <= MAX_CODE_FUNCTION_LENGTH);
693    }
694
695    #[test]
696    fn sanitize_truncates_long_code_filepath() {
697        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
698        event.code_filepath = Some(Arc::from("x".repeat(2000)));
699        sanitize_span_event(&mut event);
700        assert!(event.code_filepath.as_ref().unwrap().len() <= MAX_CODE_FILEPATH_LENGTH);
701    }
702
703    #[test]
704    fn sanitize_drops_code_function_with_control_char() {
705        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
706        event.code_function = Some(Arc::from("findItems\x1b[31m"));
707        sanitize_span_event(&mut event);
708        assert!(event.code_function.is_none());
709    }
710
711    #[test]
712    fn sanitize_drops_code_filepath_with_newline() {
713        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
714        event.code_filepath = Some(Arc::from("src/main.rs\nINJECT"));
715        sanitize_span_event(&mut event);
716        assert!(event.code_filepath.is_none());
717    }
718
719    #[test]
720    fn sanitize_drops_code_namespace_with_del() {
721        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
722        event.code_namespace = Some(Arc::from("com.foo\x7fX"));
723        sanitize_span_event(&mut event);
724        assert!(event.code_namespace.is_none());
725    }
726
727    #[test]
728    fn sanitize_keeps_clean_code_fields() {
729        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
730        event.code_function = Some(Arc::from("findItems"));
731        event.code_filepath = Some(Arc::from("src/main/java/com/foo/Repo.java"));
732        event.code_namespace = Some(Arc::from("com.foo.Repo"));
733        sanitize_span_event(&mut event);
734        assert_eq!(event.code_function.as_deref(), Some("findItems"));
735        assert_eq!(
736            event.code_filepath.as_deref(),
737            Some("src/main/java/com/foo/Repo.java")
738        );
739        assert_eq!(event.code_namespace.as_deref(), Some("com.foo.Repo"));
740    }
741
742    // ── instrumentation_scopes sanitization ─────────────────────
743
744    #[test]
745    fn sanitize_truncates_long_instrumentation_scope() {
746        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
747        event.instrumentation_scopes = vec![Arc::from("x".repeat(1024))];
748        sanitize_span_event(&mut event);
749        assert_eq!(event.instrumentation_scopes.len(), 1);
750        assert!(event.instrumentation_scopes[0].len() <= MAX_SCOPE_NAME_LENGTH);
751    }
752
753    #[test]
754    fn sanitize_drops_instrumentation_scope_with_control_char() {
755        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
756        event.instrumentation_scopes = vec![
757            Arc::from("io.opentelemetry.spring-data"),
758            Arc::from("\x1b[31mio.opentelemetry.evil\x1b[0m"),
759            Arc::from("io.opentelemetry.hibernate"),
760        ];
761        sanitize_span_event(&mut event);
762        let scopes: Vec<&str> = event
763            .instrumentation_scopes
764            .iter()
765            .map(AsRef::as_ref)
766            .collect();
767        assert_eq!(
768            scopes,
769            vec!["io.opentelemetry.spring-data", "io.opentelemetry.hibernate"]
770        );
771    }
772
773    #[test]
774    fn sanitize_caps_oversize_instrumentation_scopes_vec() {
775        let mut event: SpanEvent = serde_json::from_str(sample_sql_json()).unwrap();
776        event.instrumentation_scopes = (0..32)
777            .map(|i| Arc::from(format!("io.opentelemetry.scope-{i}")))
778            .collect();
779        sanitize_span_event(&mut event);
780        assert_eq!(
781            event.instrumentation_scopes.len(),
782            MAX_INSTRUMENTATION_SCOPES
783        );
784        assert_eq!(
785            &*event.instrumentation_scopes[0],
786            "io.opentelemetry.scope-0"
787        );
788    }
789}