Skip to main content

stackwise_core/
analyze.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::fs;
3
4use camino::{Utf8Path, Utf8PathBuf};
5use object::{Architecture, Object, ObjectSection, ObjectSymbol, SymbolKind};
6use rayon::prelude::*;
7
8use crate::elf_stack_sizes;
9use crate::graph::compute_worst_paths;
10use crate::pdb_symbols;
11use crate::pe_unwind;
12use crate::symbol_names::{crate_and_module, demangle};
13use crate::{
14    ArtifactInfo, BuildInfo, Confidence, Diagnostic, DiagnosticLevel, EdgeKind, EdgeReport,
15    Evidence, EvidenceSource, FrameInfo, FrameStatus, GeneratorInfo, GroupReport, ObjectFormat,
16    StackwiseReport, Summary, SymbolMetric, SymbolReport, UnresolvedReason, UpperBoundStatus,
17    WorstPathInfo, SCHEMA_VERSION,
18};
19
20#[derive(Debug, Clone, Default)]
21pub struct AnalyzeOptions {
22    pub build: Option<BuildInfo>,
23}
24
25pub fn analyze_artifact(
26    artifact_path: impl AsRef<Utf8Path>,
27    options: AnalyzeOptions,
28) -> Result<StackwiseReport, AnalyzeError> {
29    let artifact_path = artifact_path.as_ref();
30    let bytes = fs::read(artifact_path).map_err(|source| AnalyzeError::ReadArtifact {
31        path: artifact_path.to_path_buf(),
32        source,
33    })?;
34    let file =
35        object::File::parse(bytes.as_slice()).map_err(|source| AnalyzeError::ParseObject {
36            path: artifact_path.to_path_buf(),
37            message: source.to_string(),
38        })?;
39
40    let format = object_format(&file);
41    let architecture = format!("{:?}", file.architecture());
42    let pointer_width = pointer_width(&file);
43    let frame_sources = frame_sources(&file, format);
44
45    let mut diagnostics = Vec::new();
46    if frame_sources.is_empty() {
47        diagnostics.push(Diagnostic {
48            level: DiagnosticLevel::Warning,
49            code: "stackwise.no_frame_evidence".to_owned(),
50            message: "No exact stack-frame metadata was found; stack sizes are reported as unmeasured instead of fake zeroes.".to_owned(),
51        });
52    }
53
54    let pdb_symbols = load_debug_symbols(artifact_path, &file, format, &mut diagnostics);
55    let has_object_symbols = file.symbols().next().is_some();
56    if !has_object_symbols && pdb_symbols.is_empty() {
57        diagnostics.push(Diagnostic {
58            level: DiagnosticLevel::Warning,
59            code: "stackwise.stripped_symbols".to_owned(),
60            message: "The artifact has no regular symbol table and no adjacent PDB symbols were found; try an unstripped artifact for useful symbol names.".to_owned(),
61        });
62    } else if !has_object_symbols {
63        diagnostics.push(Diagnostic {
64            level: DiagnosticLevel::Info,
65            code: "stackwise.pdb_fallback_symbols".to_owned(),
66            message: "The artifact has no regular symbol table; using adjacent PDB symbols for names and PE unwind ranges for frames.".to_owned(),
67        });
68    }
69
70    let mut symbols = collect_symbols(&file, format, &frame_sources, &pdb_symbols);
71    let edges = collect_edges(&file, &symbols);
72    compute_worst_paths(&mut symbols, &edges);
73    let groups = build_groups(&symbols);
74    let summary = summarize(&symbols, &edges);
75
76    Ok(StackwiseReport {
77        schema_version: SCHEMA_VERSION.to_owned(),
78        generator: GeneratorInfo {
79            name: "stackwise".to_owned(),
80            version: env!("CARGO_PKG_VERSION").to_owned(),
81        },
82        artifact: ArtifactInfo {
83            path: artifact_path.to_string(),
84            file_name: artifact_path
85                .file_name()
86                .map(str::to_owned)
87                .unwrap_or_else(|| artifact_path.as_str().to_owned()),
88            format,
89            architecture,
90            pointer_width,
91            size_bytes: bytes.len() as u64,
92        },
93        build: options.build,
94        summary,
95        symbols,
96        edges,
97        groups,
98        diagnostics,
99    })
100}
101
102fn load_debug_symbols(
103    artifact_path: &Utf8Path,
104    file: &object::File<'_>,
105    format: ObjectFormat,
106    diagnostics: &mut Vec<Diagnostic>,
107) -> Vec<pdb_symbols::PdbSymbol> {
108    if format != ObjectFormat::PeCoff {
109        return Vec::new();
110    }
111
112    match pdb_symbols::load_pdb_symbols(artifact_path, file) {
113        Ok(Some(symbols)) => {
114            diagnostics.push(Diagnostic {
115                level: DiagnosticLevel::Info,
116                code: "stackwise.pdb_symbols".to_owned(),
117                message: format!(
118                    "Loaded {} function symbols from adjacent PDB.",
119                    symbols.len()
120                ),
121            });
122            symbols
123        }
124        Ok(None) => Vec::new(),
125        Err(error) => {
126            diagnostics.push(Diagnostic {
127                level: DiagnosticLevel::Warning,
128                code: "stackwise.pdb_symbols_failed".to_owned(),
129                message: error.to_string(),
130            });
131            Vec::new()
132        }
133    }
134}
135
136fn frame_sources(file: &object::File<'_>, format: ObjectFormat) -> BTreeMap<u64, FrameRecord> {
137    match format {
138        ObjectFormat::Elf => elf_stack_sizes::parse_elf_stack_sizes(file)
139            .unwrap_or_default()
140            .into_iter()
141            .map(|(address, bytes)| {
142                (
143                    address,
144                    FrameRecord {
145                        bytes,
146                        end: None,
147                        source: EvidenceSource::ElfStackSizes,
148                        confidence: Confidence::Exact,
149                        note: "Read from ELF .stack_sizes metadata emitted by LLVM.".to_owned(),
150                    },
151                )
152            })
153            .collect(),
154        ObjectFormat::PeCoff if matches!(file.architecture(), Architecture::X86_64) => {
155            pe_unwind::parse_pe_x64_unwind(file)
156                .into_iter()
157                .map(|record| {
158                    (
159                        record.begin,
160                        FrameRecord {
161                            bytes: record.stack_bytes,
162                            end: Some(record.end),
163                            source: EvidenceSource::PeUnwind,
164                            confidence: Confidence::High,
165                            note: "Recovered from PE x64 unwind metadata.".to_owned(),
166                        },
167                    )
168                })
169                .collect()
170        }
171        _ => BTreeMap::new(),
172    }
173}
174
175fn collect_symbols(
176    file: &object::File<'_>,
177    format: ObjectFormat,
178    frames: &BTreeMap<u64, FrameRecord>,
179    debug_symbols: &[pdb_symbols::PdbSymbol],
180) -> Vec<SymbolReport> {
181    let mut raw_by_address = BTreeMap::<u64, RawSymbol>::new();
182
183    for symbol in file
184        .symbols()
185        .filter(|symbol| symbol.is_definition())
186        .filter(|symbol| symbol.kind() == SymbolKind::Text)
187        .filter(|symbol| symbol.address() != 0 || symbol.size() != 0)
188        .filter(|symbol| !symbol.name().unwrap_or_default().is_empty())
189    {
190        raw_by_address
191            .entry(symbol.address())
192            .or_insert_with(|| RawSymbol {
193                name: symbol.name().unwrap_or_default().to_owned(),
194                address: symbol.address(),
195                size: symbol.size(),
196                source_location: None,
197            });
198    }
199
200    for symbol in debug_symbols {
201        raw_by_address.insert(
202            symbol.address,
203            RawSymbol {
204                name: symbol.name.clone(),
205                address: symbol.address,
206                size: symbol.size,
207                source_location: symbol.source_location.clone(),
208            },
209        );
210    }
211
212    for (address, frame) in frames {
213        raw_by_address.entry(*address).or_insert_with(|| RawSymbol {
214            name: format!("sub_{address:016x}"),
215            address: *address,
216            size: frame
217                .end
218                .and_then(|end| end.checked_sub(*address))
219                .unwrap_or_default(),
220            source_location: None,
221        });
222    }
223
224    let mut raw = raw_by_address.into_values().collect::<Vec<_>>();
225    for symbol in &mut raw {
226        if symbol.size == 0 {
227            symbol.size = frames
228                .get(&symbol.address)
229                .and_then(|frame| frame.end)
230                .and_then(|end| end.checked_sub(symbol.address))
231                .unwrap_or_default();
232        }
233    }
234
235    raw.into_par_iter()
236        .enumerate()
237        .map(|(index, raw)| {
238            let demangled = demangle(&raw.name);
239            let (crate_name, module_path) = crate_and_module(&demangled);
240            let frame = lookup_frame(raw.address, frames);
241            let mut unresolved_reasons = Vec::new();
242            let (own_frame, evidence, confidence) = match frame {
243                Some(frame) => (
244                    FrameInfo {
245                        bytes: Some(frame.bytes),
246                        status: FrameStatus::Known,
247                        evidence_source: frame.source,
248                    },
249                    vec![Evidence {
250                        source: frame.source,
251                        confidence: frame.confidence,
252                        note: frame.note.clone(),
253                    }],
254                    frame.confidence,
255                ),
256                None => {
257                    unresolved_reasons.push(UnresolvedReason::MissingStackEvidence);
258                    (
259                        FrameInfo {
260                            bytes: None,
261                            status: FrameStatus::Unknown,
262                            evidence_source: EvidenceSource::SymbolOnly,
263                        },
264                        vec![Evidence {
265                            source: EvidenceSource::SymbolOnly,
266                            confidence: Confidence::Unknown,
267                            note: "Symbol was found, but no stack-frame evidence was available for it.".to_owned(),
268                        }],
269                        Confidence::Unknown,
270                    )
271                }
272            };
273
274            SymbolReport {
275                id: index as u32,
276                name: raw.name,
277                demangled,
278                crate_name,
279                module_path,
280                address: raw.address,
281                size_bytes: (raw.size > 0).then_some(raw.size),
282                source_location: raw.source_location,
283                object_format: format,
284                own_frame,
285                worst_path: WorstPathInfo {
286                    bytes: None,
287                    status: UpperBoundStatus::Unknown,
288                    path: Vec::new(),
289                },
290                confidence,
291                evidence,
292                unresolved_reasons,
293            }
294        })
295        .collect()
296}
297
298fn lookup_frame(address: u64, frames: &BTreeMap<u64, FrameRecord>) -> Option<&FrameRecord> {
299    frames.get(&address)
300}
301
302fn collect_edges(file: &object::File<'_>, symbols: &[SymbolReport]) -> Vec<EdgeReport> {
303    let executable_sections = file
304        .sections()
305        .filter(|section| section.kind() == object::SectionKind::Text)
306        .filter_map(|section| {
307            section.data().ok().map(|data| SectionBytes {
308                address: section.address(),
309                data,
310            })
311        })
312        .collect::<Vec<_>>();
313
314    let ranges = symbols
315        .iter()
316        .filter_map(|symbol| {
317            let size = symbol.size_bytes?;
318            Some((
319                symbol.address,
320                symbol.address.saturating_add(size),
321                symbol.id,
322            ))
323        })
324        .collect::<Vec<_>>();
325
326    let mut edges = Vec::new();
327    let mut seen = BTreeSet::new();
328
329    for symbol in symbols {
330        let Some(size) = symbol.size_bytes else {
331            continue;
332        };
333        let Some(bytes) = symbol_bytes(symbol.address, size, &executable_sections) else {
334            continue;
335        };
336
337        for call in scan_x86_direct_calls(symbol.address, bytes) {
338            let callee = resolve_symbol(call.target, &ranges);
339            let kind = match (call.kind, callee) {
340                (ScannedEdgeKind::Call, Some(_)) => EdgeKind::DirectCall,
341                (ScannedEdgeKind::Jump, Some(_)) => EdgeKind::TailCall,
342                (ScannedEdgeKind::IndirectCall, _) => EdgeKind::IndirectCall,
343                (_, None) => EdgeKind::ExternalCall,
344            };
345
346            let key = (symbol.id, callee, call.target, kind);
347            if seen.insert(key) {
348                edges.push(EdgeReport {
349                    caller: symbol.id,
350                    callee,
351                    target_address: call.target,
352                    kind,
353                    confidence: Confidence::Medium,
354                });
355            }
356        }
357    }
358
359    edges
360}
361
362fn symbol_bytes<'a>(address: u64, size: u64, sections: &'a [SectionBytes<'a>]) -> Option<&'a [u8]> {
363    sections.iter().find_map(|section| {
364        let offset = address.checked_sub(section.address)? as usize;
365        let size = usize::try_from(size).ok()?;
366        section.data.get(offset..offset.checked_add(size)?)
367    })
368}
369
370fn scan_x86_direct_calls(base: u64, bytes: &[u8]) -> Vec<ScannedEdge> {
371    let mut edges = Vec::new();
372    let mut index = 0usize;
373
374    while index < bytes.len() {
375        let opcode = bytes[index];
376        match opcode {
377            0xe8 | 0xe9 if index + 5 <= bytes.len() => {
378                let rel = i32::from_le_bytes([
379                    bytes[index + 1],
380                    bytes[index + 2],
381                    bytes[index + 3],
382                    bytes[index + 4],
383                ]);
384                let next_ip = base + index as u64 + 5;
385                let target = next_ip.wrapping_add_signed(i64::from(rel));
386                edges.push(ScannedEdge {
387                    target: Some(target),
388                    kind: if opcode == 0xe8 {
389                        ScannedEdgeKind::Call
390                    } else {
391                        ScannedEdgeKind::Jump
392                    },
393                });
394                index += 5;
395            }
396            0xff if index + 2 <= bytes.len() => {
397                let reg = (bytes[index + 1] >> 3) & 0b111;
398                if reg == 2 || reg == 4 {
399                    edges.push(ScannedEdge {
400                        target: None,
401                        kind: ScannedEdgeKind::IndirectCall,
402                    });
403                }
404                index += 2;
405            }
406            _ => index += 1,
407        }
408    }
409
410    edges
411}
412
413fn resolve_symbol(address: Option<u64>, ranges: &[(u64, u64, u32)]) -> Option<u32> {
414    let address = address?;
415    ranges
416        .iter()
417        .find(|(start, end, _)| address >= *start && address < *end)
418        .map(|(_, _, id)| *id)
419}
420
421fn build_groups(symbols: &[SymbolReport]) -> Vec<GroupReport> {
422    let mut by_name: BTreeMap<String, Vec<u32>> = BTreeMap::new();
423    for symbol in symbols {
424        let module_path = symbol
425            .module_path
426            .iter()
427            .map(|part| part.trim())
428            .filter(|part| !part.is_empty())
429            .collect::<Vec<_>>();
430        let group = if module_path.is_empty() {
431            symbol
432                .crate_name
433                .as_deref()
434                .map(str::trim)
435                .filter(|name| !name.is_empty())
436                .unwrap_or("(unknown)")
437                .to_owned()
438        } else {
439            module_path.join("::")
440        };
441        by_name.entry(group).or_default().push(symbol.id);
442    }
443
444    by_name
445        .into_iter()
446        .enumerate()
447        .map(|(index, (name, symbol_ids))| {
448            let own_frame_sum = symbol_ids
449                .iter()
450                .map(|id| symbols[*id as usize].own_frame.bytes)
451                .try_fold(0u64, |sum, value| value.map(|bytes| sum + bytes));
452            let worst_path_max = symbol_ids
453                .iter()
454                .filter_map(|id| symbols[*id as usize].worst_path.bytes)
455                .max();
456
457            GroupReport {
458                id: index as u32,
459                name,
460                parent: None,
461                symbol_ids,
462                own_frame_sum,
463                worst_path_max,
464            }
465        })
466        .collect()
467}
468
469fn summarize(symbols: &[SymbolReport], edges: &[EdgeReport]) -> Summary {
470    let known_frame_count = symbols
471        .iter()
472        .filter(|symbol| symbol.own_frame.status == FrameStatus::Known)
473        .count();
474    let recursive_symbol_count = symbols
475        .iter()
476        .filter(|symbol| symbol.worst_path.status == UpperBoundStatus::Recursive)
477        .count();
478    let indirect_edge_count = edges
479        .iter()
480        .filter(|edge| edge.kind == EdgeKind::IndirectCall)
481        .count();
482
483    Summary {
484        symbol_count: symbols.len(),
485        edge_count: edges.len(),
486        known_frame_count,
487        unknown_frame_count: symbols.len().saturating_sub(known_frame_count),
488        recursive_symbol_count,
489        indirect_edge_count,
490        max_own_frame: max_own_frame(symbols),
491        max_worst_path: max_worst_path(symbols),
492        confidence: summary_confidence(symbols, known_frame_count),
493    }
494}
495
496fn summary_confidence(symbols: &[SymbolReport], known_frame_count: usize) -> Confidence {
497    if symbols.is_empty() || known_frame_count == 0 {
498        return Confidence::Unknown;
499    }
500
501    if known_frame_count != symbols.len() {
502        return Confidence::Medium;
503    }
504
505    if symbols
506        .iter()
507        .all(|symbol| symbol.confidence == Confidence::Exact)
508    {
509        Confidence::Exact
510    } else if symbols
511        .iter()
512        .all(|symbol| matches!(symbol.confidence, Confidence::Exact | Confidence::High))
513    {
514        Confidence::High
515    } else {
516        Confidence::Medium
517    }
518}
519
520fn max_own_frame(symbols: &[SymbolReport]) -> Option<SymbolMetric> {
521    symbols
522        .iter()
523        .filter_map(|symbol| symbol.own_frame.bytes.map(|bytes| (symbol, bytes)))
524        .max_by_key(|(_, bytes)| *bytes)
525        .map(|(symbol, bytes)| SymbolMetric {
526            symbol_id: symbol.id,
527            bytes,
528            demangled: symbol.demangled.clone(),
529        })
530}
531
532fn max_worst_path(symbols: &[SymbolReport]) -> Option<SymbolMetric> {
533    symbols
534        .iter()
535        .filter_map(|symbol| symbol.worst_path.bytes.map(|bytes| (symbol, bytes)))
536        .max_by_key(|(_, bytes)| *bytes)
537        .map(|(symbol, bytes)| SymbolMetric {
538            symbol_id: symbol.id,
539            bytes,
540            demangled: symbol.demangled.clone(),
541        })
542}
543
544fn object_format(file: &object::File<'_>) -> ObjectFormat {
545    match file.format() {
546        object::BinaryFormat::Elf => ObjectFormat::Elf,
547        object::BinaryFormat::Coff | object::BinaryFormat::Pe => ObjectFormat::PeCoff,
548        object::BinaryFormat::MachO => ObjectFormat::MachO,
549        object::BinaryFormat::Wasm => ObjectFormat::Wasm,
550        _ => ObjectFormat::Unknown,
551    }
552}
553
554fn pointer_width(file: &object::File<'_>) -> Option<u8> {
555    if file.is_64() {
556        Some(64)
557    } else {
558        Some(32)
559    }
560}
561
562#[derive(Debug, Clone)]
563struct FrameRecord {
564    bytes: u64,
565    end: Option<u64>,
566    source: EvidenceSource,
567    confidence: Confidence,
568    note: String,
569}
570
571#[derive(Debug)]
572struct RawSymbol {
573    name: String,
574    address: u64,
575    size: u64,
576    source_location: Option<crate::SourceLocation>,
577}
578
579#[derive(Debug)]
580struct SectionBytes<'a> {
581    address: u64,
582    data: &'a [u8],
583}
584
585#[derive(Debug)]
586struct ScannedEdge {
587    target: Option<u64>,
588    kind: ScannedEdgeKind,
589}
590
591#[derive(Debug, Clone, Copy)]
592enum ScannedEdgeKind {
593    Call,
594    Jump,
595    IndirectCall,
596}
597
598#[derive(Debug, thiserror::Error)]
599pub enum AnalyzeError {
600    #[error("failed to read artifact {path}: {source}")]
601    ReadArtifact {
602        path: Utf8PathBuf,
603        source: std::io::Error,
604    },
605    #[error("failed to parse object file {path}: {message}")]
606    ParseObject { path: Utf8PathBuf, message: String },
607}