Skip to main content

cu_profiler_core/diagnostics/
rules.rs

1//! Individual diagnostic rules. Each inspects the analysed data and optionally
2//! emits a [`Diagnostic`] with Solana-specific, actionable advice.
3
4use crate::baseline::BaselineComparison;
5use crate::budget::{PolicyResult, PolicyStatus, Severity};
6use crate::confidence::{Confidence, ConfidenceLevel};
7use crate::diagnostics::Diagnostic;
8use crate::model::Measurement;
9use crate::scenario::ExpectedResult;
10
11/// CPI count beyond which we flag a likely explosion.
12const CPI_EXPLOSION_THRESHOLD: u32 = 8;
13/// CPI depth beyond which nesting is concerning.
14const CPI_DEPTH_THRESHOLD: u32 = 4;
15/// Fraction of requested compute left unused that we consider over-requesting.
16const OVER_REQUEST_FRACTION: f64 = 0.5;
17/// Unattributed CU share above which we suggest more markers.
18const UNATTRIBUTED_THRESHOLD: f64 = 60.0;
19/// CU above which a *failing* path is considered expensive.
20const EXPENSIVE_FAILURE_CU: u64 = 5_000;
21/// CPI share of total CU above which we surface it (informational).
22const HIGH_CPI_SHARE_THRESHOLD: f64 = 70.0;
23/// Log-line count above which we flag potential event/log bloat.
24const LOG_BLOAT_THRESHOLD: usize = 25;
25
26/// Context handed to every rule.
27pub struct Context<'a> {
28    /// Scenario name.
29    pub scenario: &'a str,
30    /// The measurement under inspection.
31    pub measurement: &'a Measurement,
32    /// Evaluated budget policy results.
33    pub policy_results: &'a [PolicyResult],
34    /// Baseline comparison, if any.
35    pub baseline: Option<&'a BaselineComparison>,
36    /// Confidence in the measurement.
37    pub confidence: &'a Confidence,
38    /// What the scenario expected to happen.
39    pub expected: ExpectedResult,
40    /// Number of scope markers detected (gates scope-attribution advice).
41    pub scope_count: usize,
42    /// Number of program log/data lines emitted (drives log-bloat detection).
43    pub log_line_count: usize,
44    /// Whether a validation scope opened after a CPI (marker-gated).
45    pub late_validation: bool,
46}
47
48type Rule = fn(&Context) -> Option<Diagnostic>;
49
50/// All rules, applied in order.
51pub const RULES: &[Rule] = &[
52    absolute_budget_exceeded,
53    near_budget_limit,
54    regression_exceeded,
55    expensive_failure_path,
56    cpi_explosion,
57    high_cpi_depth,
58    high_cpi_share,
59    over_requested_compute,
60    high_unattributed,
61    event_log_bloat,
62    late_validation,
63    stale_baseline,
64    low_confidence,
65];
66
67fn policy_status(ctx: &Context, id: &str) -> Option<PolicyStatus> {
68    ctx.policy_results
69        .iter()
70        .find(|p| p.policy_id == id)
71        .map(|p| p.status)
72}
73
74fn diag(
75    ctx: &Context,
76    id: &str,
77    title: &str,
78    severity: Severity,
79    evidence: String,
80    recommendation: &str,
81) -> Diagnostic {
82    Diagnostic {
83        id: id.to_string(),
84        title: title.to_string(),
85        severity,
86        scenario: ctx.scenario.to_string(),
87        evidence,
88        recommendation: recommendation.to_string(),
89    }
90}
91
92fn absolute_budget_exceeded(ctx: &Context) -> Option<Diagnostic> {
93    (policy_status(ctx, "absolute_max_cu") == Some(PolicyStatus::Fail)).then(|| {
94        diag(
95            ctx,
96            "absolute_budget_exceeded",
97            "Absolute compute budget exceeded",
98            Severity::Error,
99            format!("{} CU consumed", ctx.measurement.total_cu),
100            "Reduce hot-path compute; profile the most expensive CPI and scope.",
101        )
102    })
103}
104
105fn near_budget_limit(ctx: &Context) -> Option<Diagnostic> {
106    (policy_status(ctx, "warn_at_budget_pct") == Some(PolicyStatus::Warn)).then(|| {
107        diag(
108            ctx,
109            "near_budget_limit",
110            "Scenario is near its compute budget",
111            Severity::Warning,
112            format!("{} CU consumed", ctx.measurement.total_cu),
113            "Leave headroom: a small regression could breach the budget.",
114        )
115    })
116}
117
118fn regression_exceeded(ctx: &Context) -> Option<Diagnostic> {
119    let failed = policy_status(ctx, "max_regression_pct") == Some(PolicyStatus::Fail)
120        || policy_status(ctx, "max_regression_units") == Some(PolicyStatus::Fail);
121    failed.then(|| {
122        let evidence = ctx
123            .baseline
124            .map(BaselineComparison::summary)
125            .unwrap_or_else(|| "regression policy exceeded".to_string());
126        diag(
127            ctx,
128            "regression_exceeded",
129            "Compute regression exceeded policy",
130            Severity::Error,
131            evidence,
132            "Inspect the CPI count and recently changed validation path.",
133        )
134    })
135}
136
137fn expensive_failure_path(ctx: &Context) -> Option<Diagnostic> {
138    let failed = !ctx.measurement.simulation_success || ctx.expected == ExpectedResult::Failure;
139    (failed && ctx.measurement.total_cu >= EXPENSIVE_FAILURE_CU).then(|| {
140        diag(
141            ctx,
142            "expensive_failure_path",
143            "Failure path consumes significant compute",
144            Severity::Warning,
145            format!("{} CU consumed before failing", ctx.measurement.total_cu),
146            "Validate cheaply and early so rejected transactions fail fast.",
147        )
148    })
149}
150
151fn cpi_explosion(ctx: &Context) -> Option<Diagnostic> {
152    (ctx.measurement.cpi_count >= CPI_EXPLOSION_THRESHOLD).then(|| {
153        diag(
154            ctx,
155            "cpi_explosion",
156            "High number of CPIs",
157            Severity::Warning,
158            format!("{} CPIs", ctx.measurement.cpi_count),
159            "Check for duplicate ATA creation or batchable cross-program calls.",
160        )
161    })
162}
163
164fn high_cpi_depth(ctx: &Context) -> Option<Diagnostic> {
165    (ctx.measurement.cpi_depth >= CPI_DEPTH_THRESHOLD).then(|| {
166        diag(
167            ctx,
168            "high_cpi_depth",
169            "Deep CPI nesting",
170            Severity::Warning,
171            format!("CPI depth {}", ctx.measurement.cpi_depth),
172            "Deep nesting risks the runtime invoke-depth limit; flatten where possible.",
173        )
174    })
175}
176
177fn high_cpi_share(ctx: &Context) -> Option<Diagnostic> {
178    // Share of total CU spent inside CPIs = the complement of the unattributed
179    // (entrypoint-local) share. Only meaningful when CPIs were actually made.
180    let cpi_share = 100.0 - ctx.measurement.unattributed_pct;
181    (ctx.measurement.cpi_count > 0 && cpi_share >= HIGH_CPI_SHARE_THRESHOLD).then(|| {
182        diag(
183            ctx,
184            "high_cpi_share",
185            "Most compute is spent in CPIs",
186            Severity::Info,
187            format!("{cpi_share:.0}% of CU consumed inside CPIs"),
188            "Review the most expensive cross-program call before optimising local code.",
189        )
190    })
191}
192
193fn event_log_bloat(ctx: &Context) -> Option<Diagnostic> {
194    (ctx.log_line_count >= LOG_BLOAT_THRESHOLD).then(|| {
195        diag(
196            ctx,
197            "event_log_bloat",
198            "High log/event volume",
199            Severity::Warning,
200            format!("{} log line(s) emitted", ctx.log_line_count),
201            "Reduce event emission in the hot path; logging itself costs compute.",
202        )
203    })
204}
205
206fn late_validation(ctx: &Context) -> Option<Diagnostic> {
207    ctx.late_validation.then(|| {
208        diag(
209            ctx,
210            "late_validation",
211            "Validation runs after a CPI",
212            Severity::Warning,
213            "a validation scope opened after a cross-program invocation".to_string(),
214            "Move cheap validation before CPIs so rejected transactions fail fast.",
215        )
216    })
217}
218
219fn over_requested_compute(ctx: &Context) -> Option<Diagnostic> {
220    let limit = ctx.measurement.requested_limit?;
221    let unused = ctx.measurement.over_requested?;
222    (limit > 0 && (unused as f64 / limit as f64) >= OVER_REQUEST_FRACTION).then(|| {
223        diag(
224            ctx,
225            "over_requested_compute",
226            "Compute budget is over-requested",
227            Severity::Info,
228            format!("{unused} of {limit} requested CU unused"),
229            "Lower the requested compute limit if it is consistently over-requested.",
230        )
231    })
232}
233
234fn high_unattributed(ctx: &Context) -> Option<Diagnostic> {
235    // Only meaningful once the user has opted into scope attribution: a program
236    // doing its own work without markers is not "unattributed" in any bad sense.
237    (ctx.scope_count > 0 && ctx.measurement.unattributed_pct >= UNATTRIBUTED_THRESHOLD).then(|| {
238        diag(
239            ctx,
240            "high_unattributed",
241            "Large share of compute is unattributed",
242            Severity::Info,
243            format!("{:.0}% unattributed CU", ctx.measurement.unattributed_pct),
244            "Add scope markers around account validation and math to attribute CU.",
245        )
246    })
247}
248
249fn stale_baseline(ctx: &Context) -> Option<Diagnostic> {
250    let baseline = ctx.baseline?;
251    (!baseline.matched).then(|| {
252        diag(
253            ctx,
254            "stale_baseline",
255            "Baseline is stale",
256            Severity::Warning,
257            baseline.stale_reasons.join("; "),
258            "Re-record the baseline after confirming the change is intended.",
259        )
260    })
261}
262
263fn low_confidence(ctx: &Context) -> Option<Diagnostic> {
264    matches!(
265        ctx.confidence.level,
266        ConfidenceLevel::Low | ConfidenceLevel::Unknown
267    )
268    .then(|| {
269        diag(
270            ctx,
271            "low_confidence",
272            "Measurement confidence is low",
273            Severity::Warning,
274            ctx.confidence.reasons.join("; "),
275            "Treat the figure as indicative; resolve the listed reasons before gating on it.",
276        )
277    })
278}