Skip to main content

cu_profiler_core/parser/
mod.rs

1//! Solana log parsing: lexer, compute-budget heuristics, CPI tree, and scope
2//! markers, plus a [`analyze`] facade that turns raw logs into a structured
3//! [`ParseAnalysis`] the profiler can consume.
4
5pub mod compute_budget;
6pub mod cpi_tree;
7pub mod scope_markers;
8pub mod solana_logs;
9
10pub use cpi_tree::{CallNode, NodeStatus};
11pub use scope_markers::{AttributionMethod, MarkerKind, ScopeResult};
12pub use solana_logs::{LexResult, LogEvent};
13
14use crate::program_registry::ProgramRegistry;
15
16/// Structured result of analysing a single transaction's logs.
17#[derive(Debug, Clone)]
18pub struct ParseAnalysis {
19    /// Reconstructed call tree (root included).
20    pub call_tree: CallNode,
21    /// Total CU consumed across top-level (depth-1) invocations.
22    pub total_cu: u64,
23    /// Number of CPIs (depth >= 2).
24    pub cpi_count: u32,
25    /// CPI nesting depth (max depth beyond the entrypoint).
26    pub cpi_depth: u32,
27    /// Estimated requested compute limit, if derivable.
28    pub requested_limit: Option<u64>,
29    /// Requested but unused CU, if derivable.
30    pub over_requested: Option<u64>,
31    /// Percentage of total CU not attributed to a CPI child (0..=100).
32    pub unattributed_pct: f64,
33    /// Scope attribution results.
34    pub scopes: Vec<ScopeResult>,
35    /// Number of scope markers detected.
36    pub scope_marker_count: usize,
37    /// Whether the transaction simulated without a `failed` line.
38    pub simulation_success: bool,
39    /// Number of `Program log:` / `Program data:` lines emitted (log volume).
40    pub log_line_count: usize,
41    /// Whether a validation scope began *after* a CPI was issued — a hint that
42    /// cheap checks run too late. Only set when scope markers are present.
43    pub validation_after_cpi: bool,
44    /// Parser warnings collected along the way.
45    pub warnings: Vec<String>,
46    /// Whether the logs parsed cleanly (no warnings).
47    pub logs_complete: bool,
48}
49
50/// Analyse a log stream into a [`ParseAnalysis`].
51#[must_use]
52pub fn analyze(logs: &[String], registry: &ProgramRegistry) -> ParseAnalysis {
53    let lexed = solana_logs::lex(logs);
54    let events: Vec<LogEvent> = lexed.events().cloned().collect();
55    let mut warnings = lexed.warnings.clone();
56
57    let call_tree = cpi_tree::build(&events, registry);
58    if structural_depth(&call_tree) > cpi_tree::MAX_DEPTH {
59        warnings.push(format!(
60            "CPI nesting exceeded {} levels; the call tree was flattened (logs are likely malformed)",
61            cpi_tree::MAX_DEPTH
62        ));
63    }
64    let total_cu = sum_units_at(&call_tree, 1);
65    let cpi_attributed = sum_units_below(&call_tree, 2);
66    let cpi_count = call_tree.cpi_count();
67    let cpi_depth = call_tree.max_depth().saturating_sub(1);
68
69    let requested_limit = compute_budget::estimated_requested_limit(&events);
70    let over_requested = requested_limit.and_then(|lim| lim.checked_sub(total_cu));
71
72    let unattributed = total_cu.saturating_sub(cpi_attributed);
73    let unattributed_pct = if total_cu == 0 {
74        0.0
75    } else {
76        (unattributed as f64 / total_cu as f64) * 100.0
77    };
78
79    let (scopes, scope_marker_count, scope_warnings) = collect_scopes(&events, total_cu);
80    warnings.extend(scope_warnings);
81
82    let simulation_success = !events.iter().any(|e| matches!(e, LogEvent::Failed { .. }));
83
84    let log_line_count = events
85        .iter()
86        .filter(|e| matches!(e, LogEvent::Log { .. }))
87        .count();
88    let validation_after_cpi = detect_validation_after_cpi(&events);
89
90    let logs_complete = warnings.is_empty();
91
92    ParseAnalysis {
93        call_tree,
94        total_cu,
95        cpi_count,
96        cpi_depth,
97        requested_limit,
98        over_requested,
99        unattributed_pct,
100        scopes,
101        scope_marker_count,
102        simulation_success,
103        log_line_count,
104        validation_after_cpi,
105        warnings,
106        logs_complete,
107    }
108}
109
110/// True if a scope whose name mentions validation opens after the first CPI
111/// invoke (depth >= 2). Evidence-based and marker-gated: with no scope markers
112/// it can never fire, so it makes no unsupported claim.
113fn detect_validation_after_cpi(events: &[LogEvent]) -> bool {
114    let mut cpi_seen = false;
115    for e in events {
116        match e {
117            LogEvent::Invoke { depth, .. } if *depth >= 2 => cpi_seen = true,
118            LogEvent::ScopeBegin { name, .. } if cpi_seen => {
119                let lower = name.to_ascii_lowercase();
120                if lower.contains("valid") || lower.contains("check") {
121                    return true;
122                }
123            }
124            _ => {}
125        }
126    }
127    false
128}
129
130/// Structural nesting depth of the tree (bounded by [`cpi_tree::MAX_DEPTH`], so
131/// this recursion is safe). Used only to detect that the cap was hit.
132fn structural_depth(node: &CallNode) -> usize {
133    1 + node
134        .children
135        .iter()
136        .map(structural_depth)
137        .max()
138        .unwrap_or(0)
139}
140
141fn sum_units_at(node: &CallNode, depth: u32) -> u64 {
142    let here = if node.depth == depth {
143        node.units_consumed.unwrap_or(0)
144    } else {
145        0
146    };
147    here + node
148        .children
149        .iter()
150        .map(|c| sum_units_at(c, depth))
151        .sum::<u64>()
152}
153
154fn sum_units_below(node: &CallNode, min_depth: u32) -> u64 {
155    let here = if node.depth >= min_depth {
156        node.units_consumed.unwrap_or(0)
157    } else {
158        0
159    };
160    here + node
161        .children
162        .iter()
163        .map(|c| sum_units_below(c, min_depth))
164        .sum::<u64>()
165}
166
167fn collect_scopes(events: &[LogEvent], total_cu: u64) -> (Vec<ScopeResult>, usize, Vec<String>) {
168    use scope_markers::{AttributionMethod, Marker, MarkerKind};
169
170    let mut markers: Vec<Marker> = Vec::new();
171    for e in events {
172        match e {
173            LogEvent::ScopeBegin { name, cu } => markers.push(Marker {
174                kind: MarkerKind::Begin,
175                name: name.clone(),
176                cu: *cu,
177            }),
178            LogEvent::ScopeEnd { name, cu } => markers.push(Marker {
179                kind: MarkerKind::End,
180                name: name.clone(),
181                cu: *cu,
182            }),
183            LogEvent::ScopePoint { name, cu } => markers.push(Marker {
184                kind: MarkerKind::Point,
185                name: name.clone(),
186                cu: *cu,
187            }),
188            _ => {}
189        }
190    }
191    let mut warnings = scope_markers::balance_warnings(&markers);
192
193    // One ScopeResult per BEGIN, with parent inferred from nesting. When a BEGIN
194    // and its matching END both carry a `cu=` snapshot, the scope's CU is the
195    // (inclusive) delta — a reliable `LogDelta` estimate; otherwise it stays an
196    // unquantified `Estimated` scope (no fake precision).
197    let mut stack: Vec<(usize, Option<u64>)> = Vec::new();
198    let mut scopes: Vec<ScopeResult> = Vec::new();
199    for m in &markers {
200        match m.kind {
201            MarkerKind::Begin => {
202                let parent = stack.last().map(|&(i, _)| scopes[i].name.clone());
203                stack.push((scopes.len(), m.cu));
204                scopes.push(ScopeResult {
205                    name: m.name.clone(),
206                    parent,
207                    units_estimated: None,
208                    percentage_of_total: None,
209                    attribution_method: AttributionMethod::Estimated,
210                    warnings: Vec::new(),
211                });
212            }
213            MarkerKind::End => {
214                let matches_top = stack.last().is_some_and(|&(i, _)| scopes[i].name == m.name);
215                if matches_top {
216                    if let Some((idx, begin_cu)) = stack.pop() {
217                        if let (Some(begin), Some(end)) = (begin_cu, m.cu) {
218                            let units = begin.saturating_sub(end);
219                            scopes[idx].units_estimated = Some(units);
220                            scopes[idx].attribution_method = AttributionMethod::LogDelta;
221                            // A snapshot delta larger than the measured program
222                            // total is contradictory (only possible with broken
223                            // logs): withhold the percentage rather than emit a
224                            // nonsensical >100%, and flag the inconsistency.
225                            if total_cu > 0 && units <= total_cu {
226                                scopes[idx].percentage_of_total =
227                                    Some((units as f64 / total_cu as f64) * 100.0);
228                            } else if units > total_cu {
229                                let warning = format!(
230                                    "scope `{}` CU delta ({units}) exceeds the measured total ({total_cu}); percentage withheld",
231                                    scopes[idx].name
232                                );
233                                scopes[idx].warnings.push(warning.clone());
234                                warnings.push(warning);
235                            }
236                        }
237                    }
238                }
239            }
240            MarkerKind::Point => {}
241        }
242    }
243
244    (scopes, markers.len(), warnings)
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    fn lines(raw: &[&str]) -> Vec<String> {
252        raw.iter().map(|s| (*s).to_string()).collect()
253    }
254
255    #[test]
256    fn computes_totals_and_unattributed() {
257        let logs = lines(&[
258            "Program User111 invoke [1]",
259            "Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA invoke [2]",
260            "Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA consumed 3000 of 197000 compute units",
261            "Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA success",
262            "Program User111 consumed 12000 of 200000 compute units",
263            "Program User111 success",
264        ]);
265        let a = analyze(&logs, &ProgramRegistry::with_builtins());
266        assert_eq!(a.total_cu, 12_000);
267        assert_eq!(a.cpi_count, 1);
268        assert_eq!(a.cpi_depth, 1);
269        assert_eq!(a.requested_limit, Some(200_000));
270        assert_eq!(a.over_requested, Some(188_000));
271        // 12000 total - 3000 in CPI = 9000 unattributed = 75%.
272        assert!((a.unattributed_pct - 75.0).abs() < 0.01);
273        assert!(a.simulation_success);
274    }
275
276    #[test]
277    fn scopes_inferred_with_nesting() {
278        let logs = lines(&[
279            "Program User111 invoke [1]",
280            "Program log: CU_PROFILER_BEGIN name=outer",
281            "Program log: CU_PROFILER_BEGIN name=inner",
282            "Program log: CU_PROFILER_END name=inner",
283            "Program log: CU_PROFILER_END name=outer",
284            "Program User111 consumed 5000 of 200000 compute units",
285            "Program User111 success",
286        ]);
287        let a = analyze(&logs, &ProgramRegistry::with_builtins());
288        assert_eq!(a.scope_marker_count, 4);
289        assert_eq!(a.scopes.len(), 2);
290        assert_eq!(a.scopes[1].parent.as_deref(), Some("outer"));
291        assert!(a.warnings.is_empty());
292    }
293
294    #[test]
295    fn estimates_scope_cu_from_compute_snapshots() {
296        let logs = lines(&[
297            "Program User111 invoke [1]",
298            "Program log: CU_PROFILER_BEGIN name=swap::validate cu=200000",
299            "Program log: CU_PROFILER_END name=swap::validate cu=188000",
300            "Program User111 consumed 24000 of 200000 compute units",
301            "Program User111 success",
302        ]);
303        let a = analyze(&logs, &ProgramRegistry::with_builtins());
304        let scope = &a.scopes[0];
305        assert_eq!(scope.name, "swap::validate");
306        // 200000 remaining at begin − 188000 at end = 12000 CU in the scope.
307        assert_eq!(scope.units_estimated, Some(12_000));
308        assert_eq!(scope.attribution_method, AttributionMethod::LogDelta);
309        // 12000 / 24000 total = 50%.
310        assert!((scope.percentage_of_total.unwrap() - 50.0).abs() < 0.01);
311    }
312
313    #[test]
314    fn contradictory_snapshot_withholds_percentage_and_warns() {
315        // Snapshot delta (12000) exceeds the program's measured total (5000):
316        // inconsistent logs. The percentage must be withheld, not >100%.
317        let logs = lines(&[
318            "Program User111 invoke [1]",
319            "Program log: CU_PROFILER_BEGIN name=swap::math cu=200000",
320            "Program log: CU_PROFILER_END name=swap::math cu=188000",
321            "Program User111 consumed 5000 of 200000 compute units",
322            "Program User111 success",
323        ]);
324        let a = analyze(&logs, &ProgramRegistry::with_builtins());
325        assert_eq!(a.scopes[0].units_estimated, Some(12_000));
326        assert_eq!(a.scopes[0].percentage_of_total, None);
327        assert!(
328            a.warnings
329                .iter()
330                .any(|w| w.contains("exceeds the measured total"))
331        );
332    }
333
334    #[test]
335    fn scope_without_snapshots_stays_unquantified() {
336        let logs = lines(&[
337            "Program User111 invoke [1]",
338            "Program log: CU_PROFILER_BEGIN name=swap::validate",
339            "Program log: CU_PROFILER_END name=swap::validate",
340            "Program User111 consumed 24000 of 200000 compute units",
341            "Program User111 success",
342        ]);
343        let a = analyze(&logs, &ProgramRegistry::with_builtins());
344        assert_eq!(a.scopes[0].units_estimated, None);
345        assert_eq!(a.scopes[0].attribution_method, AttributionMethod::Estimated);
346    }
347
348    #[test]
349    fn detects_log_volume_and_late_validation() {
350        let logs = lines(&[
351            "Program User111 invoke [1]",
352            "Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA invoke [2]",
353            "Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA success",
354            // Validation marker opens *after* the CPI above.
355            "Program log: CU_PROFILER_BEGIN name=swap::validate_accounts",
356            "Program log: CU_PROFILER_END name=swap::validate_accounts",
357            "Program log: emitting event one",
358            "Program log: emitting event two",
359            "Program User111 consumed 9000 of 200000 compute units",
360            "Program User111 success",
361        ]);
362        let a = analyze(&logs, &ProgramRegistry::with_builtins());
363        assert!(a.validation_after_cpi);
364        assert_eq!(a.log_line_count, 2); // the two non-marker `Program log:` lines
365    }
366
367    #[test]
368    fn validation_before_cpi_is_not_flagged() {
369        let logs = lines(&[
370            "Program User111 invoke [1]",
371            "Program log: CU_PROFILER_BEGIN name=swap::validate_accounts",
372            "Program log: CU_PROFILER_END name=swap::validate_accounts",
373            "Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA invoke [2]",
374            "Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA success",
375            "Program User111 consumed 9000 of 200000 compute units",
376            "Program User111 success",
377        ]);
378        let a = analyze(&logs, &ProgramRegistry::with_builtins());
379        assert!(!a.validation_after_cpi);
380    }
381
382    #[test]
383    fn failure_path_flips_simulation_success() {
384        let logs = lines(&[
385            "Program User111 invoke [1]",
386            "Program User111 consumed 8000 of 200000 compute units",
387            "Program User111 failed: custom program error: 0x1",
388        ]);
389        let a = analyze(&logs, &ProgramRegistry::with_builtins());
390        assert!(!a.simulation_success);
391        assert_eq!(a.total_cu, 8000);
392    }
393}