Skip to main content

sentinel_core/detect/
mod.rs

1//! Detection stage: identifies performance anti-patterns in traces.
2
3pub mod chatty;
4pub mod correlate_cross;
5pub mod fanout;
6pub mod n_plus_one;
7pub mod pool_saturation;
8pub mod redundant;
9pub mod sanitizer_aware;
10pub mod serialized;
11pub mod slow;
12pub mod suggestions;
13
14pub use n_plus_one::DISCLOSURE_N_PLUS_ONE_THRESHOLD;
15
16use std::collections::HashMap;
17
18use crate::correlate::Trace;
19use crate::event::EventType;
20use serde::{Deserialize, Serialize};
21
22/// Precomputed per-trace indices shared by the fanout and serialized
23/// detectors. Both detectors need `children_by_parent` +
24/// `span_index`; building them once per trace and passing the struct
25/// halves the hot-path `HashMap` cost on traces that trigger both
26/// detectors.
27///
28/// `pub` visibility is required because [`fanout::detect_fanout`] and
29/// [`serialized::detect_serialized`] are public entry points that take
30/// a `&TraceIndices<'_>`. The internal `build` constructor stays
31/// `pub(super)` so external callers cannot bypass the `detect()`
32/// orchestrator to produce an inconsistent indices / trace pair.
33pub struct TraceIndices<'a> {
34    pub children_by_parent: HashMap<&'a str, Vec<usize>>,
35    pub span_index: HashMap<&'a str, usize>,
36}
37
38impl<'a> TraceIndices<'a> {
39    /// Build both indices in a single pass over the trace's spans.
40    #[must_use]
41    pub fn build(trace: &'a Trace) -> Self {
42        let mut children_by_parent: HashMap<&str, Vec<usize>> =
43            HashMap::with_capacity(trace.spans.len() / 4 + 1);
44        let mut span_index: HashMap<&str, usize> = HashMap::with_capacity(trace.spans.len());
45        for (idx, span) in trace.spans.iter().enumerate() {
46            span_index.insert(span.event.span_id.as_str(), idx);
47            if let Some(ref parent_id) = span.event.parent_span_id {
48                children_by_parent
49                    .entry(parent_id.as_str())
50                    .or_default()
51                    .push(idx);
52            }
53        }
54        Self {
55            children_by_parent,
56            span_index,
57        }
58    }
59}
60
61/// A detected performance anti-pattern.
62#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
63pub struct Finding {
64    /// The anti-pattern category (N+1, redundant, slow, fanout, etc.).
65    #[serde(rename = "type")]
66    pub finding_type: FindingType,
67    /// Severity level: critical, warning or info.
68    pub severity: Severity,
69    /// Trace identifier where the anti-pattern was detected.
70    pub trace_id: String,
71    /// Name of the service emitting the spans involved in the finding.
72    pub service: String,
73    /// Normalized inbound endpoint (route template) hosting the pattern.
74    pub source_endpoint: String,
75    /// Details of the matched pattern: template, occurrences, window, params.
76    pub pattern: Pattern,
77    /// Human-readable remediation hint for this finding.
78    pub suggestion: String,
79    /// Earliest timestamp among spans in the detected group.
80    pub first_timestamp: String,
81    /// Latest timestamp among spans in the detected group.
82    pub last_timestamp: String,
83    /// `GreenOps` impact estimate. Absent when green scoring is disabled.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub green_impact: Option<GreenImpact>,
86    /// Source context of this finding: CI batch run, staging daemon, or
87    /// production daemon. Used by downstream consumers (perf-lint) to
88    /// boost or reduce severity based on how the finding was produced.
89    ///
90    /// **Contract:** detectors always emit [`Confidence::default()`]
91    /// (= `CiBatch`); the real value is stamped by the pipeline caller
92    /// (`pipeline::analyze_with_traces` for batch, `daemon::process_traces`
93    /// for the daemon) after detection returns. This keeps the detector
94    /// layer oblivious to runtime context.
95    #[serde(default)]
96    pub confidence: Confidence,
97    /// How this finding's type was classified.
98    ///
99    /// `None` (default, omitted from JSON) means direct classification
100    /// via the standard pipeline rules (`distinct_params >= threshold`
101    /// for N+1, repeated identical `(template, params)` for redundant).
102    /// `Some(SanitizerHeuristic)` means the type was inferred via the
103    /// sanitizer-aware heuristic, because the OpenTelemetry agent
104    /// collapsed every parameter to `?` and the standard distinct-params
105    /// signal was unusable. Operators can filter on this field to spot
106    /// where the heuristic is firing.
107    #[serde(default, skip_serializing_if = "Option::is_none")]
108    pub classification_method: Option<ClassificationMethod>,
109    /// Source code location from `OTel` `code.*` span attributes.
110    /// `None` when the instrumentation agent does not emit these attributes.
111    #[serde(default, skip_serializing_if = "Option::is_none")]
112    pub code_location: Option<crate::event::CodeLocation>,
113    /// OpenTelemetry instrumentation scope chain from the originating
114    /// span and its ancestors (leaf to root, deduplicated). Primary
115    /// framework signal for [`suggestions::enrich`]. Empty when the
116    /// upstream format carries no scope info (Jaeger, Zipkin) or the
117    /// trace is synthetic.
118    #[serde(default, skip_serializing_if = "Vec::is_empty")]
119    pub instrumentation_scopes: Vec<String>,
120    /// Framework-specific actionable fix, populated by
121    /// [`suggestions::enrich`] after the per-trace detectors run. `None`
122    /// when no framework can be inferred or the `(finding_type,
123    /// framework)` pair has no mapping in the fixes table.
124    #[serde(default, skip_serializing_if = "Option::is_none")]
125    pub suggested_fix: Option<suggestions::SuggestedFix>,
126    /// Canonical signature for ack matching, e.g.
127    /// `n_plus_one_sql:order-svc:POST_/api/orders:a3f8b2c1`. Always
128    /// present in JSON output so users can copy-paste it into
129    /// `.perf-sentinel-acknowledgments.toml`. Filled by
130    /// [`crate::acknowledgments::enrich_with_signatures`] at end of
131    /// `pipeline::analyze` and after deserializing baselines.
132    #[serde(default)]
133    pub signature: String,
134}
135
136/// Types of performance anti-patterns.
137#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
138#[serde(rename_all = "snake_case")]
139pub enum FindingType {
140    NPlusOneSql,
141    NPlusOneHttp,
142    RedundantSql,
143    RedundantHttp,
144    SlowSql,
145    SlowHttp,
146    ExcessiveFanout,
147    ChattyService,
148    PoolSaturation,
149    SerializedCalls,
150}
151
152/// Severity levels for findings.
153#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
154#[serde(rename_all = "snake_case")]
155pub enum Severity {
156    Critical,
157    Warning,
158    Info,
159}
160
161/// Source context for a [`Finding`]: where and how it was produced.
162///
163/// perf-lint consumes this field via its runtime-findings import path and
164/// uses it to adjust severity in the IDE. A `daemon_production` finding
165/// (observed on real production traffic) is a much stronger signal than a
166/// `ci_batch` finding (observed on a controlled integration test run with
167/// limited traffic shapes).
168#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default)]
169#[serde(rename_all = "snake_case")]
170pub enum Confidence {
171    /// Batch `analyze` run on traces collected in CI (integration tests).
172    /// Lowest confidence: limited traffic shapes, controlled environment.
173    ///
174    /// Marked `#[default]` so detectors that emit `Confidence::default()`
175    /// get the safest fallback (lowest confidence), a forgotten stamp
176    /// never inflates perf-lint's severity.
177    #[default]
178    CiBatch,
179    /// Daemon `watch` run on staging traffic. Medium confidence: real
180    /// patterns but not production scale.
181    DaemonStaging,
182    /// Daemon `watch` run on production traffic. Highest confidence:
183    /// real patterns at real scale.
184    DaemonProduction,
185}
186
187impl Confidence {
188    /// Returns the `snake_case` string representation.
189    #[must_use]
190    pub const fn as_str(&self) -> &'static str {
191        match self {
192            Self::CiBatch => "ci_batch",
193            Self::DaemonStaging => "daemon_staging",
194            Self::DaemonProduction => "daemon_production",
195        }
196    }
197
198    /// Map confidence to a SARIF `rank` value (0-100).
199    ///
200    /// Rank is SARIF v2.1.0's standard "how much should this matter"
201    /// signal: 0 = low priority, 100 = highest. Populating it means
202    /// SARIF consumers that ignore the custom `properties` bag still
203    /// get a usable ordering.
204    #[must_use]
205    pub const fn sarif_rank(&self) -> u32 {
206        match self {
207            Self::CiBatch => 30,
208            Self::DaemonStaging => 60,
209            Self::DaemonProduction => 90,
210        }
211    }
212}
213
214/// How a [`Finding`]'s type was determined.
215///
216/// Orthogonal to [`Confidence`]: confidence describes the runtime context
217/// (CI vs production daemon), `ClassificationMethod` describes which
218/// detection rule produced the type. Stored in
219/// [`Finding::classification_method`] as `Option`; `None` means the
220/// standard direct rule fired.
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
222#[serde(rename_all = "snake_case")]
223pub enum ClassificationMethod {
224    /// Standard pipeline classification (e.g. `distinct_params >=
225    /// threshold` for N+1, repeated identical `(template, params)` for
226    /// redundant). Equivalent to the absence of the field; emitted
227    /// explicitly only when a caller wants to be unambiguous.
228    Direct,
229    /// Reclassified via a heuristic path. For SQL: the `OTel` agent's
230    /// sanitizer collapsed parameters to `?`, and the timing/scope
231    /// signals suggest N+1 over redundant. For HTTP: repeated identical
232    /// params with high timing variance suggest N+1 over redundant.
233    SanitizerHeuristic,
234}
235
236/// Pattern details for a finding.
237#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
238pub struct Pattern {
239    /// Normalized query or URL template shared by the matched spans.
240    pub template: String,
241    /// Number of spans that matched this template within the window.
242    pub occurrences: usize,
243    /// Time span, in milliseconds, covering all matched occurrences.
244    pub window_ms: u64,
245    /// Count of distinct parameter sets observed across occurrences.
246    pub distinct_params: usize,
247    /// Median per-span duration in the group (µs). Diagnostic field
248    /// populated by the n+1 and slow detectors. Not used in the
249    /// detection verdict, exposed so downstream consumers can profile
250    /// cache-warm patterns without needing daemon-log access.
251    #[serde(default, skip_serializing_if = "Option::is_none")]
252    pub span_duration_us_p50: Option<u64>,
253    /// 99th-percentile per-span duration in the group (µs).
254    #[serde(default, skip_serializing_if = "Option::is_none")]
255    pub span_duration_us_p99: Option<u64>,
256    /// Coefficient of variation of per-span durations, scaled by 1000
257    /// (523 means CV = 0.523). Avoids floating-point fields so
258    /// `Pattern` can keep its `Eq` derive.
259    #[serde(default, skip_serializing_if = "Option::is_none")]
260    pub span_duration_cv_x1000: Option<u32>,
261}
262
263/// `GreenOps` impact for a single finding.
264#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
265pub struct GreenImpact {
266    /// Extra I/O operations caused by this anti-pattern (occurrences - 1).
267    pub estimated_extra_io_ops: usize,
268    /// I/O Intensity Score of the endpoint where this finding occurs.
269    pub io_intensity_score: f64,
270    /// Classification band for `io_intensity_score`
271    /// (`healthy` / `moderate` / `high` / `critical`).
272    ///
273    /// Computed by [`crate::report::interpret::InterpretationLevel::for_iis`].
274    /// The enum values are stable across versions; the thresholds behind
275    /// them are versioned with the binary. See
276    /// [`crate::report::interpret`] for the stability contract.
277    pub io_intensity_band: crate::report::interpret::InterpretationLevel,
278}
279
280impl FindingType {
281    #[must_use]
282    pub const fn from_event_type_n_plus_one(event_type: &EventType) -> Self {
283        match event_type {
284            EventType::Sql => Self::NPlusOneSql,
285            EventType::HttpOut => Self::NPlusOneHttp,
286        }
287    }
288
289    #[must_use]
290    pub const fn from_event_type_redundant(event_type: &EventType) -> Self {
291        match event_type {
292            EventType::Sql => Self::RedundantSql,
293            EventType::HttpOut => Self::RedundantHttp,
294        }
295    }
296
297    #[must_use]
298    pub const fn from_event_type_slow(event_type: &EventType) -> Self {
299        match event_type {
300            EventType::Sql => Self::SlowSql,
301            EventType::HttpOut => Self::SlowHttp,
302        }
303    }
304
305    /// Returns the `snake_case` string representation of this finding type.
306    #[must_use]
307    pub const fn as_str(&self) -> &'static str {
308        match self {
309            Self::NPlusOneSql => "n_plus_one_sql",
310            Self::NPlusOneHttp => "n_plus_one_http",
311            Self::RedundantSql => "redundant_sql",
312            Self::RedundantHttp => "redundant_http",
313            Self::SlowSql => "slow_sql",
314            Self::SlowHttp => "slow_http",
315            Self::ExcessiveFanout => "excessive_fanout",
316            Self::ChattyService => "chatty_service",
317            Self::PoolSaturation => "pool_saturation",
318            Self::SerializedCalls => "serialized_calls",
319        }
320    }
321
322    /// RGESN 2024 criteria (ARCEP/Arcom/ADEME) this finding type relates to.
323    ///
324    /// An interpretive crosswalk, not a compliance certification: the RGESN
325    /// criterion titles do not name "N+1" or "slow query", these are the
326    /// criteria whose intent the anti-pattern bears on. `slow_*` returns an
327    /// empty slice on purpose, RGESN family 9 "Algorithmie" is ML-specific and
328    /// no criterion targets single-operation latency. Rationale and the full
329    /// crosswalk live in `docs/METHODOLOGY.md`.
330    #[must_use]
331    pub const fn rgesn_criteria(&self) -> &'static [&'static str] {
332        match self {
333            Self::NPlusOneSql | Self::NPlusOneHttp => &["7.1", "6.1"],
334            Self::RedundantSql | Self::RedundantHttp => &["7.1", "6.5"],
335            Self::ChattyService => &["4.9", "4.10", "6.1"],
336            Self::ExcessiveFanout | Self::PoolSaturation => &["3.2"],
337            Self::SerializedCalls => &["8.10"],
338            Self::SlowSql | Self::SlowHttp => &[],
339        }
340    }
341
342    /// Parse a `FindingType` from its `snake_case` string, the inverse of
343    /// [`as_str`](Self::as_str). Returns `None` for an unknown string.
344    #[must_use]
345    pub fn from_kind_str(s: &str) -> Option<Self> {
346        match s {
347            "n_plus_one_sql" => Some(Self::NPlusOneSql),
348            "n_plus_one_http" => Some(Self::NPlusOneHttp),
349            "redundant_sql" => Some(Self::RedundantSql),
350            "redundant_http" => Some(Self::RedundantHttp),
351            "slow_sql" => Some(Self::SlowSql),
352            "slow_http" => Some(Self::SlowHttp),
353            "excessive_fanout" => Some(Self::ExcessiveFanout),
354            "chatty_service" => Some(Self::ChattyService),
355            "pool_saturation" => Some(Self::PoolSaturation),
356            "serialized_calls" => Some(Self::SerializedCalls),
357            _ => None,
358        }
359    }
360
361    /// Returns a short human-readable label for CLI and TUI display.
362    #[must_use]
363    pub const fn display_label(&self) -> &'static str {
364        match self {
365            Self::NPlusOneSql => "N+1 SQL",
366            Self::NPlusOneHttp => "N+1 HTTP",
367            Self::RedundantSql => "Redundant SQL",
368            Self::RedundantHttp => "Redundant HTTP",
369            Self::SlowSql => "Slow SQL",
370            Self::SlowHttp => "Slow HTTP",
371            Self::ExcessiveFanout => "Excessive fanout",
372            Self::ChattyService => "Chatty service",
373            Self::PoolSaturation => "Pool saturation",
374            Self::SerializedCalls => "Serialized calls",
375        }
376    }
377
378    /// Whether this finding type represents avoidable I/O operations.
379    ///
380    /// Only N+1 and redundant qualify (batchable or cacheable). Slow,
381    /// fanout, chatty, pool saturation and serialized calls are excluded
382    /// from waste scoring; the per-type rationale is in the "Not part of
383    /// waste ratio" sections of `docs/design/04-DETECTION.md`.
384    #[must_use]
385    pub const fn is_avoidable_io(&self) -> bool {
386        matches!(
387            self,
388            Self::NPlusOneSql | Self::NPlusOneHttp | Self::RedundantSql | Self::RedundantHttp
389        )
390    }
391}
392
393impl Severity {
394    /// Returns the `snake_case` string representation of this severity.
395    #[must_use]
396    pub const fn as_str(&self) -> &'static str {
397        match self {
398            Self::Critical => "critical",
399            Self::Warning => "warning",
400            Self::Info => "info",
401        }
402    }
403}
404
405/// Configuration for the detection stage.
406#[derive(Debug, Clone)]
407pub struct DetectConfig {
408    pub n_plus_one_threshold: u32,
409    pub window_ms: u64,
410    pub slow_threshold_ms: u64,
411    pub slow_min_occurrences: u32,
412    pub max_fanout: u32,
413    pub chatty_service_min_calls: u32,
414    pub pool_saturation_concurrent_threshold: u32,
415    pub serialized_min_sequential: u32,
416    pub sanitizer_aware_classification: sanitizer_aware::SanitizerAwareMode,
417}
418
419impl From<&crate::config::Config> for DetectConfig {
420    fn from(config: &crate::config::Config) -> Self {
421        Self {
422            n_plus_one_threshold: config.detection.n_plus_one_threshold,
423            window_ms: config.detection.window_duration_ms,
424            slow_threshold_ms: config.detection.slow_query_threshold_ms,
425            slow_min_occurrences: config.detection.slow_query_min_occurrences,
426            max_fanout: config.detection.max_fanout,
427            chatty_service_min_calls: config.detection.chatty_service_min_calls,
428            pool_saturation_concurrent_threshold: config
429                .detection
430                .pool_saturation_concurrent_threshold,
431            serialized_min_sequential: config.detection.serialized_min_sequential,
432            sanitizer_aware_classification: config.detection.sanitizer_aware_classification,
433        }
434    }
435}
436
437/// Arguments for [`build_per_trace_finding`], grouped to stay under
438/// clippy's argument-count limit.
439pub(crate) struct PerTraceFindingArgs<'a> {
440    pub finding_type: FindingType,
441    pub severity: Severity,
442    pub trace_id: &'a str,
443    pub first_span: &'a crate::normalize::NormalizedEvent,
444    pub template: &'a str,
445    pub occurrences: usize,
446    pub window_ms: u64,
447    pub distinct_params: usize,
448    pub suggestion: String,
449    pub first_timestamp: &'a str,
450    pub last_timestamp: &'a str,
451    pub code_location: Option<crate::event::CodeLocation>,
452    pub instrumentation_scopes: Vec<String>,
453    pub classification_method: Option<ClassificationMethod>,
454    pub span_durations_us: Option<Vec<u64>>,
455}
456
457// Build a [`Finding`] from the common fields shared by per-trace
458// detectors (N+1, redundant, slow). Avoids duplicating the struct
459// literal across detection modules. (doc kept as non-doc comment to
460// avoid an empty-line-after-doc-comment clippy error with the next fn.)
461
462/// Compute diagnostic timing stats from a mutable slice of per-span
463/// durations (microseconds). Returns `(p50_us, p99_us, cv_x1000)`.
464fn compute_timing_stats(durations: &mut [u64]) -> (u64, u64, u32) {
465    if durations.is_empty() {
466        return (0, 0, 0);
467    }
468    durations.sort_unstable();
469    let n = durations.len();
470    let p50 = durations[slow::percentile_index(n, 50)];
471    let p99 = durations[slow::percentile_index(n, 99)];
472    #[allow(clippy::cast_precision_loss)]
473    let n_f = n as f64;
474    let mut mean = 0.0_f64;
475    let mut m2 = 0.0_f64;
476    let mut count = 0u64;
477    for &d in durations.iter() {
478        count += 1;
479        #[allow(clippy::cast_precision_loss)]
480        let val = d as f64;
481        let delta = val - mean;
482        #[allow(clippy::cast_precision_loss)]
483        let cf = count as f64;
484        mean += delta / cf;
485        m2 += delta * (val - mean);
486    }
487    let cv_x1000 = if mean > 0.0 && n_f > 1.0 {
488        let cv = (m2 / n_f).sqrt() / mean;
489        #[allow(clippy::cast_sign_loss)] // cv * 1000 is always non-negative
490        {
491            (cv * 1000.0).round() as u32
492        }
493    } else {
494        0
495    };
496    (p50, p99, cv_x1000)
497}
498
499pub(crate) fn build_per_trace_finding(args: PerTraceFindingArgs<'_>) -> Finding {
500    let timing = args
501        .span_durations_us
502        .map(|mut d| compute_timing_stats(&mut d));
503    Finding {
504        finding_type: args.finding_type,
505        severity: args.severity,
506        trace_id: args.trace_id.to_string(),
507        service: args.first_span.event.service.to_string(),
508        source_endpoint: args.first_span.event.source.endpoint.clone(),
509        pattern: Pattern {
510            template: args.template.to_string(),
511            occurrences: args.occurrences,
512            window_ms: args.window_ms,
513            distinct_params: args.distinct_params,
514            span_duration_us_p50: timing.map(|(p50, _, _)| p50),
515            span_duration_us_p99: timing.map(|(_, p99, _)| p99),
516            span_duration_cv_x1000: timing.map(|(_, _, cv)| cv),
517        },
518        suggestion: args.suggestion,
519        first_timestamp: args.first_timestamp.to_string(),
520        last_timestamp: args.last_timestamp.to_string(),
521        green_impact: None,
522        confidence: Confidence::default(),
523        classification_method: args.classification_method,
524        code_location: args.code_location,
525        instrumentation_scopes: args.instrumentation_scopes,
526        suggested_fix: None,
527        signature: String::new(),
528    }
529}
530
531/// Stamp `confidence` on every finding in the slice.
532///
533/// Detectors emit `Confidence::default()` (= [`Confidence::CiBatch`])
534/// per the contract on [`Finding::confidence`]. Pipeline callers
535/// override the value with the runtime context (`CiBatch` for batch
536/// `analyze`, `DaemonStaging` or `DaemonProduction` for the daemon)
537/// using this helper so neither the batch nor the daemon path has to
538/// duplicate the loop.
539pub fn apply_confidence(findings: &mut [Finding], confidence: Confidence) {
540    for finding in findings.iter_mut() {
541        finding.confidence = confidence;
542    }
543}
544
545/// Run per-trace + cross-trace detection on a set of traces.
546///
547/// Returns the unsorted, unconfidence-stamped `Vec<Finding>`. Callers
548/// stamp confidence via [`apply_confidence`] then sort via
549/// [`sort_findings`] before emission.
550///
551/// Cross-trace detection is gated on `traces.len() >= 2` because the
552/// percentile-based `detect_slow_cross_trace` requires multiple
553/// observations to compute a meaningful p50/p95/p99.
554#[must_use]
555pub fn run_full_detection(traces: &[Trace], config: &DetectConfig) -> Vec<Finding> {
556    let mut findings = detect(traces, config);
557    if traces.len() >= 2 {
558        let mut cross_trace = slow::detect_slow_cross_trace(
559            traces,
560            config.slow_threshold_ms,
561            config.slow_min_occurrences,
562        );
563        findings.append(&mut cross_trace);
564    }
565    findings
566}
567
568/// Run all per-trace detectors on a set of traces.
569///
570/// Does not include cross-trace analysis; see [`slow::detect_slow_cross_trace`]
571/// or use [`run_full_detection`] for the combined pass.
572#[must_use]
573pub fn detect(traces: &[Trace], config: &DetectConfig) -> Vec<Finding> {
574    let mut findings = Vec::new();
575    for trace in traces {
576        // Span-relationship indices are built once per trace and shared
577        // by the detectors that need them (fanout, serialized).
578        let indices = TraceIndices::build(trace);
579        // append() moves the backing allocation in O(1), no iterator
580        // overhead. n_plus_one must run before redundant: redundant
581        // receives its findings to skip templates already classified
582        // as N+1 (including sanitizer-heuristic reclassifications).
583        let mut n_plus_one_findings = n_plus_one::detect_n_plus_one(
584            trace,
585            config.n_plus_one_threshold,
586            config.window_ms,
587            config.sanitizer_aware_classification,
588        );
589        let mut redundant_findings = redundant::detect_redundant(trace, &n_plus_one_findings);
590        findings.append(&mut n_plus_one_findings);
591        findings.append(&mut redundant_findings);
592        findings.append(&mut slow::detect_slow(
593            trace,
594            config.slow_threshold_ms,
595            config.slow_min_occurrences,
596        ));
597        findings.append(&mut fanout::detect_fanout(
598            trace,
599            &indices,
600            config.max_fanout,
601        ));
602        findings.append(&mut chatty::detect_chatty(
603            trace,
604            config.chatty_service_min_calls,
605        ));
606        findings.append(&mut pool_saturation::detect_pool_saturation(
607            trace,
608            config.pool_saturation_concurrent_threshold,
609        ));
610        findings.append(&mut serialized::detect_serialized(
611            trace,
612            &indices,
613            config.serialized_min_sequential,
614        ));
615    }
616    suggestions::enrich(&mut findings);
617    findings
618}
619
620/// Sort findings deterministically for stable output.
621///
622/// Orders by finding type, severity, trace ID, source endpoint, and template.
623pub(crate) fn sort_findings(findings: &mut [Finding]) {
624    findings.sort_by(|a, b| {
625        a.finding_type
626            .cmp(&b.finding_type)
627            .then_with(|| a.severity.cmp(&b.severity))
628            .then_with(|| a.trace_id.cmp(&b.trace_id))
629            .then_with(|| a.source_endpoint.cmp(&b.source_endpoint))
630            .then_with(|| a.pattern.template.cmp(&b.pattern.template))
631    });
632}
633
634#[cfg(test)]
635mod tests {
636    use super::*;
637
638    fn default_config() -> DetectConfig {
639        DetectConfig {
640            n_plus_one_threshold: 5,
641            window_ms: 500,
642            slow_threshold_ms: 500,
643            slow_min_occurrences: 3,
644            max_fanout: 20,
645            chatty_service_min_calls: 15,
646            pool_saturation_concurrent_threshold: 10,
647            serialized_min_sequential: 3,
648            sanitizer_aware_classification: sanitizer_aware::SanitizerAwareMode::default(),
649        }
650    }
651
652    #[test]
653    fn empty_traces_produce_no_findings() {
654        let findings = detect(&[], &default_config());
655        assert!(findings.is_empty());
656    }
657
658    #[test]
659    fn finding_type_serializes_to_snake_case() {
660        let json = serde_json::to_string(&FindingType::NPlusOneSql).unwrap();
661        assert_eq!(json, r#""n_plus_one_sql""#);
662
663        let json = serde_json::to_string(&FindingType::RedundantHttp).unwrap();
664        assert_eq!(json, r#""redundant_http""#);
665
666        let json = serde_json::to_string(&FindingType::SlowSql).unwrap();
667        assert_eq!(json, r#""slow_sql""#);
668
669        let json = serde_json::to_string(&FindingType::SlowHttp).unwrap();
670        assert_eq!(json, r#""slow_http""#);
671
672        let json = serde_json::to_string(&FindingType::ExcessiveFanout).unwrap();
673        assert_eq!(json, r#""excessive_fanout""#);
674
675        let json = serde_json::to_string(&FindingType::ChattyService).unwrap();
676        assert_eq!(json, r#""chatty_service""#);
677
678        let json = serde_json::to_string(&FindingType::PoolSaturation).unwrap();
679        assert_eq!(json, r#""pool_saturation""#);
680
681        let json = serde_json::to_string(&FindingType::SerializedCalls).unwrap();
682        assert_eq!(json, r#""serialized_calls""#);
683    }
684
685    #[test]
686    fn severity_serializes_to_snake_case() {
687        let json = serde_json::to_string(&Severity::Critical).unwrap();
688        assert_eq!(json, r#""critical""#);
689    }
690
691    // --- Confidence field tests ---
692
693    #[test]
694    fn confidence_default_is_ci_batch() {
695        assert_eq!(Confidence::default(), Confidence::CiBatch);
696    }
697
698    #[test]
699    fn confidence_serializes_to_snake_case() {
700        assert_eq!(
701            serde_json::to_string(&Confidence::CiBatch).unwrap(),
702            r#""ci_batch""#
703        );
704        assert_eq!(
705            serde_json::to_string(&Confidence::DaemonStaging).unwrap(),
706            r#""daemon_staging""#
707        );
708        assert_eq!(
709            serde_json::to_string(&Confidence::DaemonProduction).unwrap(),
710            r#""daemon_production""#
711        );
712    }
713
714    #[test]
715    fn confidence_deserializes_from_snake_case() {
716        let c: Confidence = serde_json::from_str(r#""ci_batch""#).unwrap();
717        assert_eq!(c, Confidence::CiBatch);
718        let c: Confidence = serde_json::from_str(r#""daemon_staging""#).unwrap();
719        assert_eq!(c, Confidence::DaemonStaging);
720        let c: Confidence = serde_json::from_str(r#""daemon_production""#).unwrap();
721        assert_eq!(c, Confidence::DaemonProduction);
722    }
723
724    #[test]
725    fn confidence_as_str_matches_serialization() {
726        assert_eq!(Confidence::CiBatch.as_str(), "ci_batch");
727        assert_eq!(Confidence::DaemonStaging.as_str(), "daemon_staging");
728        assert_eq!(Confidence::DaemonProduction.as_str(), "daemon_production");
729    }
730
731    #[test]
732    fn confidence_sarif_rank_increases_with_confidence() {
733        // Ordering must be strictly ascending so SARIF consumers that sort
734        // by rank produce the expected "production > staging > CI" order.
735        assert!(Confidence::CiBatch.sarif_rank() < Confidence::DaemonStaging.sarif_rank());
736        assert!(Confidence::DaemonStaging.sarif_rank() < Confidence::DaemonProduction.sarif_rank());
737        assert_eq!(Confidence::CiBatch.sarif_rank(), 30);
738        assert_eq!(Confidence::DaemonStaging.sarif_rank(), 60);
739        assert_eq!(Confidence::DaemonProduction.sarif_rank(), 90);
740    }
741
742    #[test]
743    fn detector_findings_default_to_ci_batch_confidence() {
744        // Detectors emit `Confidence::default()`, the pipeline/daemon
745        // caller is responsible for stamping the real value. Verify the
746        // default here so a regression that changes Confidence::default()
747        // surfaces loudly.
748        use crate::test_helpers::{make_sql_event, make_trace};
749        let events: Vec<crate::event::SpanEvent> = (1..=6)
750            .map(|i| {
751                make_sql_event(
752                    "trace-1",
753                    &format!("span-{i}"),
754                    &format!("SELECT * FROM order_item WHERE order_id = {i}"),
755                    &format!("2025-07-10T14:32:01.{:03}Z", i * 50),
756                )
757            })
758            .collect();
759        let trace = make_trace(events);
760        let findings = detect(&[trace], &default_config());
761        assert!(!findings.is_empty());
762        for f in &findings {
763            assert_eq!(f.confidence, Confidence::CiBatch);
764        }
765    }
766
767    #[test]
768    fn detect_combines_n_plus_one_and_redundant() {
769        use crate::test_helpers::{make_sql_event, make_trace};
770        // 5 events with different params -> N+1
771        // 3 events with same params -> redundant
772        let mut events = Vec::new();
773        for i in 1..=5 {
774            events.push(make_sql_event(
775                "trace-1",
776                &format!("span-{i}"),
777                &format!("SELECT * FROM order_item WHERE order_id = {i}"),
778                &format!("2025-07-10T14:32:01.{:03}Z", i * 50),
779            ));
780        }
781        for i in 6..=8 {
782            events.push(make_sql_event(
783                "trace-1",
784                &format!("span-{i}"),
785                "SELECT * FROM config WHERE key = 'timeout'",
786                &format!("2025-07-10T14:32:01.{:03}Z", i * 30),
787            ));
788        }
789
790        let trace = make_trace(events);
791        let findings = detect(&[trace], &default_config());
792
793        let has_n_plus_one = findings
794            .iter()
795            .any(|f| f.finding_type == FindingType::NPlusOneSql);
796        let has_redundant = findings
797            .iter()
798            .any(|f| f.finding_type == FindingType::RedundantSql);
799        assert!(has_n_plus_one, "should detect N+1");
800        assert!(has_redundant, "should detect redundant");
801    }
802
803    #[test]
804    fn detect_multiple_traces() {
805        use crate::test_helpers::{make_sql_event, make_trace};
806        // Two separate traces, each with redundant queries
807        let events_t1: Vec<crate::event::SpanEvent> = (1..=3)
808            .map(|i| {
809                make_sql_event(
810                    "trace-A",
811                    &format!("span-a{i}"),
812                    "SELECT * FROM order_item WHERE order_id = 42",
813                    &format!("2025-07-10T14:32:01.{:03}Z", i * 50),
814                )
815            })
816            .collect();
817
818        let events_t2: Vec<crate::event::SpanEvent> = (1..=2)
819            .map(|i| {
820                make_sql_event(
821                    "trace-B",
822                    &format!("span-b{i}"),
823                    "SELECT * FROM orders WHERE user_id = 7",
824                    &format!("2025-07-10T14:32:02.{:03}Z", i * 50),
825                )
826            })
827            .collect();
828
829        let trace_a = make_trace(events_t1);
830        let trace_b = make_trace(events_t2);
831        let findings = detect(&[trace_a, trace_b], &default_config());
832
833        // Both traces have redundant queries
834        assert!(
835            findings.iter().any(|f| f.trace_id == "trace-A"),
836            "trace-A should have findings"
837        );
838        assert!(
839            findings.iter().any(|f| f.trace_id == "trace-B"),
840            "trace-B should have findings"
841        );
842    }
843
844    #[test]
845    fn finding_type_as_str() {
846        assert_eq!(FindingType::NPlusOneSql.as_str(), "n_plus_one_sql");
847        assert_eq!(FindingType::SlowHttp.as_str(), "slow_http");
848        assert_eq!(FindingType::ChattyService.as_str(), "chatty_service");
849        assert_eq!(FindingType::PoolSaturation.as_str(), "pool_saturation");
850        assert_eq!(FindingType::SerializedCalls.as_str(), "serialized_calls");
851    }
852
853    #[test]
854    fn severity_as_str() {
855        assert_eq!(Severity::Critical.as_str(), "critical");
856        assert_eq!(Severity::Warning.as_str(), "warning");
857        assert_eq!(Severity::Info.as_str(), "info");
858    }
859
860    #[test]
861    fn rgesn_criteria_crosswalk() {
862        // N+1 and redundant relate to server caching (7.1).
863        assert_eq!(FindingType::NPlusOneSql.rgesn_criteria(), &["7.1", "6.1"]);
864        assert_eq!(FindingType::RedundantHttp.rgesn_criteria(), &["7.1", "6.5"]);
865        assert_eq!(
866            FindingType::ChattyService.rgesn_criteria(),
867            &["4.9", "4.10", "6.1"]
868        );
869        assert_eq!(FindingType::ExcessiveFanout.rgesn_criteria(), &["3.2"]);
870        assert_eq!(FindingType::PoolSaturation.rgesn_criteria(), &["3.2"]);
871        assert_eq!(FindingType::SerializedCalls.rgesn_criteria(), &["8.10"]);
872        // slow_* has no direct RGESN criterion (family 9 is ML-specific).
873        assert!(FindingType::SlowSql.rgesn_criteria().is_empty());
874        assert!(FindingType::SlowHttp.rgesn_criteria().is_empty());
875    }
876
877    #[test]
878    fn from_kind_str_inverts_as_str() {
879        // Lock from_kind_str against as_str drift. as_str is an exhaustive
880        // match (the compiler forces an arm for every new variant), but
881        // from_kind_str matches on `&str` with a `_ => None` fallback, so a
882        // new variant would silently parse to None and drop its rgesn_criteria
883        // from disclosure. This round-trip fails if the two ever disagree.
884        use FindingType::*;
885        for v in [
886            NPlusOneSql,
887            NPlusOneHttp,
888            RedundantSql,
889            RedundantHttp,
890            SlowSql,
891            SlowHttp,
892            ExcessiveFanout,
893            ChattyService,
894            PoolSaturation,
895            SerializedCalls,
896        ] {
897            assert_eq!(
898                FindingType::from_kind_str(v.as_str()),
899                Some(v.clone()),
900                "{v:?}"
901            );
902        }
903        assert_eq!(FindingType::from_kind_str("unknown_pattern"), None);
904    }
905
906    #[test]
907    fn finding_type_from_event_type_n_plus_one() {
908        use crate::event::EventType;
909        assert_eq!(
910            FindingType::from_event_type_n_plus_one(&EventType::Sql),
911            FindingType::NPlusOneSql
912        );
913        assert_eq!(
914            FindingType::from_event_type_n_plus_one(&EventType::HttpOut),
915            FindingType::NPlusOneHttp
916        );
917    }
918
919    #[test]
920    fn finding_type_from_event_type_redundant() {
921        use crate::event::EventType;
922        assert_eq!(
923            FindingType::from_event_type_redundant(&EventType::Sql),
924            FindingType::RedundantSql
925        );
926        assert_eq!(
927            FindingType::from_event_type_redundant(&EventType::HttpOut),
928            FindingType::RedundantHttp
929        );
930    }
931
932    #[test]
933    fn finding_type_from_event_type_slow() {
934        use crate::event::EventType;
935        assert_eq!(
936            FindingType::from_event_type_slow(&EventType::Sql),
937            FindingType::SlowSql
938        );
939        assert_eq!(
940            FindingType::from_event_type_slow(&EventType::HttpOut),
941            FindingType::SlowHttp
942        );
943    }
944
945    #[test]
946    fn detect_all_three_types_on_one_trace() {
947        use crate::test_helpers::{make_sql_event, make_sql_event_with_duration, make_trace};
948        let mut events = Vec::new();
949        // 5 different params -> N+1
950        for i in 1..=5 {
951            events.push(make_sql_event(
952                "trace-1",
953                &format!("span-n{i}"),
954                &format!("SELECT * FROM order_item WHERE order_id = {i}"),
955                &format!("2025-07-10T14:32:01.{:03}Z", i * 50),
956            ));
957        }
958        // 3 identical queries -> redundant
959        for i in 1..=3 {
960            events.push(make_sql_event(
961                "trace-1",
962                &format!("span-r{i}"),
963                "SELECT * FROM config WHERE key = 'timeout'",
964                &format!("2025-07-10T14:32:02.{:03}Z", i * 30),
965            ));
966        }
967        // 3 slow queries -> slow
968        for i in 1..=3 {
969            events.push(make_sql_event_with_duration(
970                "trace-1",
971                &format!("span-s{i}"),
972                &format!("SELECT * FROM big_table WHERE id = {}", i + 100),
973                &format!("2025-07-10T14:32:03.{:03}Z", i * 30),
974                600_000,
975            ));
976        }
977        let trace = make_trace(events);
978        let findings = detect(&[trace], &default_config());
979
980        let has_n1 = findings
981            .iter()
982            .any(|f| f.finding_type == FindingType::NPlusOneSql);
983        let has_redundant = findings
984            .iter()
985            .any(|f| f.finding_type == FindingType::RedundantSql);
986        let has_slow = findings
987            .iter()
988            .any(|f| f.finding_type == FindingType::SlowSql);
989
990        assert!(has_n1, "should detect N+1");
991        assert!(has_redundant, "should detect redundant");
992        assert!(has_slow, "should detect slow");
993    }
994
995    // --- Serde roundtrip for Finding (Deserialize added for query CLI) ---
996
997    #[test]
998    fn finding_serde_roundtrip() {
999        let finding =
1000            crate::test_helpers::make_finding(FindingType::NPlusOneSql, Severity::Warning);
1001        let json = serde_json::to_string(&finding).unwrap();
1002        let back: Finding = serde_json::from_str(&json).unwrap();
1003        assert_eq!(finding.finding_type, back.finding_type);
1004        assert_eq!(finding.severity, back.severity);
1005        assert_eq!(finding.trace_id, back.trace_id);
1006        assert_eq!(finding.service, back.service);
1007        assert_eq!(finding.pattern.template, back.pattern.template);
1008        assert_eq!(finding.confidence, back.confidence);
1009    }
1010
1011    #[test]
1012    fn finding_with_code_location_serde_roundtrip() {
1013        let mut finding =
1014            crate::test_helpers::make_finding(FindingType::NPlusOneSql, Severity::Warning);
1015        finding.code_location = Some(crate::event::CodeLocation {
1016            function: Some("processItems".to_string()),
1017            filepath: Some("src/Order.java".to_string()),
1018            lineno: Some(42),
1019            namespace: Some("com.example".to_string()),
1020        });
1021        let json = serde_json::to_string(&finding).unwrap();
1022        let back: Finding = serde_json::from_str(&json).unwrap();
1023        let loc = back.code_location.unwrap();
1024        assert_eq!(loc.function.as_deref(), Some("processItems"));
1025        assert_eq!(loc.lineno, Some(42));
1026    }
1027
1028    #[test]
1029    fn finding_type_deserializes_from_snake_case() {
1030        let ft: FindingType = serde_json::from_str(r#""n_plus_one_sql""#).unwrap();
1031        assert_eq!(ft, FindingType::NPlusOneSql);
1032        let ft: FindingType = serde_json::from_str(r#""chatty_service""#).unwrap();
1033        assert_eq!(ft, FindingType::ChattyService);
1034    }
1035
1036    #[test]
1037    fn severity_deserializes_from_snake_case() {
1038        let s: Severity = serde_json::from_str(r#""critical""#).unwrap();
1039        assert_eq!(s, Severity::Critical);
1040        let s: Severity = serde_json::from_str(r#""warning""#).unwrap();
1041        assert_eq!(s, Severity::Warning);
1042    }
1043
1044    // --- compute_timing_stats ---
1045
1046    #[test]
1047    fn timing_stats_empty_returns_zeroes() {
1048        assert_eq!(compute_timing_stats(&mut []), (0, 0, 0));
1049    }
1050
1051    #[test]
1052    fn timing_stats_single_element() {
1053        let (p50, p99, cv) = compute_timing_stats(&mut [800]);
1054        assert_eq!(p50, 800);
1055        assert_eq!(p99, 800);
1056        assert_eq!(cv, 0);
1057    }
1058
1059    #[test]
1060    fn timing_stats_two_elements_p99_is_max() {
1061        let (p50, p99, _cv) = compute_timing_stats(&mut [100, 900]);
1062        assert_eq!(p50, 100); // n=2, p50 index = 0 (lower value)
1063        assert_eq!(p99, 900); // n=2, p99 index = 1 (max)
1064    }
1065
1066    #[test]
1067    fn timing_stats_five_elements_p99_is_max() {
1068        let (p50, p99, _cv) = compute_timing_stats(&mut [10, 20, 30, 40, 50]);
1069        assert_eq!(p50, 30);
1070        assert_eq!(p99, 50);
1071    }
1072
1073    #[test]
1074    fn timing_stats_identical_durations_cv_zero() {
1075        let mut durations = [100u64; 10];
1076        let (_p50, _p99, cv) = compute_timing_stats(&mut durations);
1077        assert_eq!(cv, 0);
1078    }
1079
1080    #[test]
1081    fn timing_stats_dispersed_durations_cv_matches_variance_helper() {
1082        let mut durations = [100u64, 50, 200, 60, 250, 80, 300, 70, 150, 400];
1083        let (_p50, _p99, cv) = compute_timing_stats(&mut durations);
1084        // CV ~ 0.68 on this set → cv_x1000 ~ 680
1085        assert!(cv > 500, "CV should be > 0.5, got {cv}");
1086        assert!(cv < 800, "CV should be < 0.8, got {cv}");
1087    }
1088}