1use std::collections::{BTreeMap, BTreeSet};
2use std::fs;
3
4use camino::{Utf8Path, Utf8PathBuf};
5use object::{Architecture, Object, ObjectSection, ObjectSymbol, SymbolKind};
6use rayon::prelude::*;
7
8use crate::elf_stack_sizes;
9use crate::graph::compute_worst_paths;
10use crate::pdb_symbols;
11use crate::pe_unwind;
12use crate::symbol_names::{crate_and_module, demangle};
13use crate::{
14 ArtifactInfo, BuildInfo, Confidence, Diagnostic, DiagnosticLevel, EdgeKind, EdgeReport,
15 Evidence, EvidenceSource, FrameInfo, FrameStatus, GeneratorInfo, GroupReport, ObjectFormat,
16 StackwiseReport, Summary, SymbolMetric, SymbolReport, UnresolvedReason, UpperBoundStatus,
17 WorstPathInfo, SCHEMA_VERSION,
18};
19
20#[derive(Debug, Clone, Default)]
21pub struct AnalyzeOptions {
22 pub build: Option<BuildInfo>,
23}
24
25pub fn analyze_artifact(
26 artifact_path: impl AsRef<Utf8Path>,
27 options: AnalyzeOptions,
28) -> Result<StackwiseReport, AnalyzeError> {
29 let artifact_path = artifact_path.as_ref();
30 let bytes = fs::read(artifact_path).map_err(|source| AnalyzeError::ReadArtifact {
31 path: artifact_path.to_path_buf(),
32 source,
33 })?;
34 let file =
35 object::File::parse(bytes.as_slice()).map_err(|source| AnalyzeError::ParseObject {
36 path: artifact_path.to_path_buf(),
37 message: source.to_string(),
38 })?;
39
40 let format = object_format(&file);
41 let architecture = format!("{:?}", file.architecture());
42 let pointer_width = pointer_width(&file);
43 let frame_sources = frame_sources(&file, format);
44
45 let mut diagnostics = Vec::new();
46 if frame_sources.is_empty() {
47 diagnostics.push(Diagnostic {
48 level: DiagnosticLevel::Warning,
49 code: "stackwise.no_frame_evidence".to_owned(),
50 message: "No exact stack-frame metadata was found; stack sizes are reported as unmeasured instead of fake zeroes.".to_owned(),
51 });
52 }
53
54 let pdb_symbols = load_debug_symbols(artifact_path, &file, format, &mut diagnostics);
55 let has_object_symbols = file.symbols().next().is_some();
56 if !has_object_symbols && pdb_symbols.is_empty() {
57 diagnostics.push(Diagnostic {
58 level: DiagnosticLevel::Warning,
59 code: "stackwise.stripped_symbols".to_owned(),
60 message: "The artifact has no regular symbol table and no adjacent PDB symbols were found; try an unstripped artifact for useful symbol names.".to_owned(),
61 });
62 } else if !has_object_symbols {
63 diagnostics.push(Diagnostic {
64 level: DiagnosticLevel::Info,
65 code: "stackwise.pdb_fallback_symbols".to_owned(),
66 message: "The artifact has no regular symbol table; using adjacent PDB symbols for names and PE unwind ranges for frames.".to_owned(),
67 });
68 }
69
70 let mut symbols = collect_symbols(&file, format, &frame_sources, &pdb_symbols);
71 let edges = collect_edges(&file, &symbols);
72 compute_worst_paths(&mut symbols, &edges);
73 let groups = build_groups(&symbols);
74 let summary = summarize(&symbols, &edges);
75
76 Ok(StackwiseReport {
77 schema_version: SCHEMA_VERSION.to_owned(),
78 generator: GeneratorInfo {
79 name: "stackwise".to_owned(),
80 version: env!("CARGO_PKG_VERSION").to_owned(),
81 },
82 artifact: ArtifactInfo {
83 path: artifact_path.to_string(),
84 file_name: artifact_path
85 .file_name()
86 .map(str::to_owned)
87 .unwrap_or_else(|| artifact_path.as_str().to_owned()),
88 format,
89 architecture,
90 pointer_width,
91 size_bytes: bytes.len() as u64,
92 },
93 build: options.build,
94 summary,
95 symbols,
96 edges,
97 groups,
98 diagnostics,
99 })
100}
101
102fn load_debug_symbols(
103 artifact_path: &Utf8Path,
104 file: &object::File<'_>,
105 format: ObjectFormat,
106 diagnostics: &mut Vec<Diagnostic>,
107) -> Vec<pdb_symbols::PdbSymbol> {
108 if format != ObjectFormat::PeCoff {
109 return Vec::new();
110 }
111
112 match pdb_symbols::load_pdb_symbols(artifact_path, file) {
113 Ok(Some(symbols)) => {
114 diagnostics.push(Diagnostic {
115 level: DiagnosticLevel::Info,
116 code: "stackwise.pdb_symbols".to_owned(),
117 message: format!(
118 "Loaded {} function symbols from adjacent PDB.",
119 symbols.len()
120 ),
121 });
122 symbols
123 }
124 Ok(None) => Vec::new(),
125 Err(error) => {
126 diagnostics.push(Diagnostic {
127 level: DiagnosticLevel::Warning,
128 code: "stackwise.pdb_symbols_failed".to_owned(),
129 message: error.to_string(),
130 });
131 Vec::new()
132 }
133 }
134}
135
136fn frame_sources(file: &object::File<'_>, format: ObjectFormat) -> BTreeMap<u64, FrameRecord> {
137 match format {
138 ObjectFormat::Elf => elf_stack_sizes::parse_elf_stack_sizes(file)
139 .unwrap_or_default()
140 .into_iter()
141 .map(|(address, bytes)| {
142 (
143 address,
144 FrameRecord {
145 bytes,
146 end: None,
147 source: EvidenceSource::ElfStackSizes,
148 confidence: Confidence::Exact,
149 note: "Read from ELF .stack_sizes metadata emitted by LLVM.".to_owned(),
150 },
151 )
152 })
153 .collect(),
154 ObjectFormat::PeCoff if matches!(file.architecture(), Architecture::X86_64) => {
155 pe_unwind::parse_pe_x64_unwind(file)
156 .into_iter()
157 .map(|record| {
158 (
159 record.begin,
160 FrameRecord {
161 bytes: record.stack_bytes,
162 end: Some(record.end),
163 source: EvidenceSource::PeUnwind,
164 confidence: Confidence::High,
165 note: "Recovered from PE x64 unwind metadata.".to_owned(),
166 },
167 )
168 })
169 .collect()
170 }
171 _ => BTreeMap::new(),
172 }
173}
174
175fn collect_symbols(
176 file: &object::File<'_>,
177 format: ObjectFormat,
178 frames: &BTreeMap<u64, FrameRecord>,
179 debug_symbols: &[pdb_symbols::PdbSymbol],
180) -> Vec<SymbolReport> {
181 let mut raw_by_address = BTreeMap::<u64, RawSymbol>::new();
182
183 for symbol in file
184 .symbols()
185 .filter(|symbol| symbol.is_definition())
186 .filter(|symbol| symbol.kind() == SymbolKind::Text)
187 .filter(|symbol| symbol.address() != 0 || symbol.size() != 0)
188 .filter(|symbol| !symbol.name().unwrap_or_default().is_empty())
189 {
190 raw_by_address
191 .entry(symbol.address())
192 .or_insert_with(|| RawSymbol {
193 name: symbol.name().unwrap_or_default().to_owned(),
194 address: symbol.address(),
195 size: symbol.size(),
196 source_location: None,
197 });
198 }
199
200 for symbol in debug_symbols {
201 raw_by_address.insert(
202 symbol.address,
203 RawSymbol {
204 name: symbol.name.clone(),
205 address: symbol.address,
206 size: symbol.size,
207 source_location: symbol.source_location.clone(),
208 },
209 );
210 }
211
212 for (address, frame) in frames {
213 raw_by_address.entry(*address).or_insert_with(|| RawSymbol {
214 name: format!("sub_{address:016x}"),
215 address: *address,
216 size: frame
217 .end
218 .and_then(|end| end.checked_sub(*address))
219 .unwrap_or_default(),
220 source_location: None,
221 });
222 }
223
224 let mut raw = raw_by_address.into_values().collect::<Vec<_>>();
225 for symbol in &mut raw {
226 if symbol.size == 0 {
227 symbol.size = frames
228 .get(&symbol.address)
229 .and_then(|frame| frame.end)
230 .and_then(|end| end.checked_sub(symbol.address))
231 .unwrap_or_default();
232 }
233 }
234
235 raw.into_par_iter()
236 .enumerate()
237 .map(|(index, raw)| {
238 let demangled = demangle(&raw.name);
239 let (crate_name, module_path) = crate_and_module(&demangled);
240 let frame = lookup_frame(raw.address, frames);
241 let mut unresolved_reasons = Vec::new();
242 let (own_frame, evidence, confidence) = match frame {
243 Some(frame) => (
244 FrameInfo {
245 bytes: Some(frame.bytes),
246 status: FrameStatus::Known,
247 evidence_source: frame.source,
248 },
249 vec![Evidence {
250 source: frame.source,
251 confidence: frame.confidence,
252 note: frame.note.clone(),
253 }],
254 frame.confidence,
255 ),
256 None => {
257 unresolved_reasons.push(UnresolvedReason::MissingStackEvidence);
258 (
259 FrameInfo {
260 bytes: None,
261 status: FrameStatus::Unknown,
262 evidence_source: EvidenceSource::SymbolOnly,
263 },
264 vec![Evidence {
265 source: EvidenceSource::SymbolOnly,
266 confidence: Confidence::Unknown,
267 note: "Symbol was found, but no stack-frame evidence was available for it.".to_owned(),
268 }],
269 Confidence::Unknown,
270 )
271 }
272 };
273
274 SymbolReport {
275 id: index as u32,
276 name: raw.name,
277 demangled,
278 crate_name,
279 module_path,
280 address: raw.address,
281 size_bytes: (raw.size > 0).then_some(raw.size),
282 source_location: raw.source_location,
283 object_format: format,
284 own_frame,
285 worst_path: WorstPathInfo {
286 bytes: None,
287 status: UpperBoundStatus::Unknown,
288 path: Vec::new(),
289 },
290 confidence,
291 evidence,
292 unresolved_reasons,
293 }
294 })
295 .collect()
296}
297
298fn lookup_frame(address: u64, frames: &BTreeMap<u64, FrameRecord>) -> Option<&FrameRecord> {
299 frames.get(&address)
300}
301
302fn collect_edges(file: &object::File<'_>, symbols: &[SymbolReport]) -> Vec<EdgeReport> {
303 let executable_sections = file
304 .sections()
305 .filter(|section| section.kind() == object::SectionKind::Text)
306 .filter_map(|section| {
307 section.data().ok().map(|data| SectionBytes {
308 address: section.address(),
309 data,
310 })
311 })
312 .collect::<Vec<_>>();
313
314 let ranges = symbols
315 .iter()
316 .filter_map(|symbol| {
317 let size = symbol.size_bytes?;
318 Some((
319 symbol.address,
320 symbol.address.saturating_add(size),
321 symbol.id,
322 ))
323 })
324 .collect::<Vec<_>>();
325
326 let mut edges = Vec::new();
327 let mut seen = BTreeSet::new();
328
329 for symbol in symbols {
330 let Some(size) = symbol.size_bytes else {
331 continue;
332 };
333 let Some(bytes) = symbol_bytes(symbol.address, size, &executable_sections) else {
334 continue;
335 };
336
337 for call in scan_x86_direct_calls(symbol.address, bytes) {
338 let callee = resolve_symbol(call.target, &ranges);
339 let kind = match (call.kind, callee) {
340 (ScannedEdgeKind::Call, Some(_)) => EdgeKind::DirectCall,
341 (ScannedEdgeKind::Jump, Some(_)) => EdgeKind::TailCall,
342 (ScannedEdgeKind::IndirectCall, _) => EdgeKind::IndirectCall,
343 (_, None) => EdgeKind::ExternalCall,
344 };
345
346 let key = (symbol.id, callee, call.target, kind);
347 if seen.insert(key) {
348 edges.push(EdgeReport {
349 caller: symbol.id,
350 callee,
351 target_address: call.target,
352 kind,
353 confidence: Confidence::Medium,
354 });
355 }
356 }
357 }
358
359 edges
360}
361
362fn symbol_bytes<'a>(address: u64, size: u64, sections: &'a [SectionBytes<'a>]) -> Option<&'a [u8]> {
363 sections.iter().find_map(|section| {
364 let offset = address.checked_sub(section.address)? as usize;
365 let size = usize::try_from(size).ok()?;
366 section.data.get(offset..offset.checked_add(size)?)
367 })
368}
369
370fn scan_x86_direct_calls(base: u64, bytes: &[u8]) -> Vec<ScannedEdge> {
371 let mut edges = Vec::new();
372 let mut index = 0usize;
373
374 while index < bytes.len() {
375 let opcode = bytes[index];
376 match opcode {
377 0xe8 | 0xe9 if index + 5 <= bytes.len() => {
378 let rel = i32::from_le_bytes([
379 bytes[index + 1],
380 bytes[index + 2],
381 bytes[index + 3],
382 bytes[index + 4],
383 ]);
384 let next_ip = base + index as u64 + 5;
385 let target = next_ip.wrapping_add_signed(i64::from(rel));
386 edges.push(ScannedEdge {
387 target: Some(target),
388 kind: if opcode == 0xe8 {
389 ScannedEdgeKind::Call
390 } else {
391 ScannedEdgeKind::Jump
392 },
393 });
394 index += 5;
395 }
396 0xff if index + 2 <= bytes.len() => {
397 let reg = (bytes[index + 1] >> 3) & 0b111;
398 if reg == 2 || reg == 4 {
399 edges.push(ScannedEdge {
400 target: None,
401 kind: ScannedEdgeKind::IndirectCall,
402 });
403 }
404 index += 2;
405 }
406 _ => index += 1,
407 }
408 }
409
410 edges
411}
412
413fn resolve_symbol(address: Option<u64>, ranges: &[(u64, u64, u32)]) -> Option<u32> {
414 let address = address?;
415 ranges
416 .iter()
417 .find(|(start, end, _)| address >= *start && address < *end)
418 .map(|(_, _, id)| *id)
419}
420
421fn build_groups(symbols: &[SymbolReport]) -> Vec<GroupReport> {
422 let mut by_name: BTreeMap<String, Vec<u32>> = BTreeMap::new();
423 for symbol in symbols {
424 let module_path = symbol
425 .module_path
426 .iter()
427 .map(|part| part.trim())
428 .filter(|part| !part.is_empty())
429 .collect::<Vec<_>>();
430 let group = if module_path.is_empty() {
431 symbol
432 .crate_name
433 .as_deref()
434 .map(str::trim)
435 .filter(|name| !name.is_empty())
436 .unwrap_or("(unknown)")
437 .to_owned()
438 } else {
439 module_path.join("::")
440 };
441 by_name.entry(group).or_default().push(symbol.id);
442 }
443
444 by_name
445 .into_iter()
446 .enumerate()
447 .map(|(index, (name, symbol_ids))| {
448 let own_frame_sum = symbol_ids
449 .iter()
450 .map(|id| symbols[*id as usize].own_frame.bytes)
451 .try_fold(0u64, |sum, value| value.map(|bytes| sum + bytes));
452 let worst_path_max = symbol_ids
453 .iter()
454 .filter_map(|id| symbols[*id as usize].worst_path.bytes)
455 .max();
456
457 GroupReport {
458 id: index as u32,
459 name,
460 parent: None,
461 symbol_ids,
462 own_frame_sum,
463 worst_path_max,
464 }
465 })
466 .collect()
467}
468
469fn summarize(symbols: &[SymbolReport], edges: &[EdgeReport]) -> Summary {
470 let known_frame_count = symbols
471 .iter()
472 .filter(|symbol| symbol.own_frame.status == FrameStatus::Known)
473 .count();
474 let recursive_symbol_count = symbols
475 .iter()
476 .filter(|symbol| symbol.worst_path.status == UpperBoundStatus::Recursive)
477 .count();
478 let indirect_edge_count = edges
479 .iter()
480 .filter(|edge| edge.kind == EdgeKind::IndirectCall)
481 .count();
482
483 Summary {
484 symbol_count: symbols.len(),
485 edge_count: edges.len(),
486 known_frame_count,
487 unknown_frame_count: symbols.len().saturating_sub(known_frame_count),
488 recursive_symbol_count,
489 indirect_edge_count,
490 max_own_frame: max_own_frame(symbols),
491 max_worst_path: max_worst_path(symbols),
492 confidence: summary_confidence(symbols, known_frame_count),
493 }
494}
495
496fn summary_confidence(symbols: &[SymbolReport], known_frame_count: usize) -> Confidence {
497 if symbols.is_empty() || known_frame_count == 0 {
498 return Confidence::Unknown;
499 }
500
501 if known_frame_count != symbols.len() {
502 return Confidence::Medium;
503 }
504
505 if symbols
506 .iter()
507 .all(|symbol| symbol.confidence == Confidence::Exact)
508 {
509 Confidence::Exact
510 } else if symbols
511 .iter()
512 .all(|symbol| matches!(symbol.confidence, Confidence::Exact | Confidence::High))
513 {
514 Confidence::High
515 } else {
516 Confidence::Medium
517 }
518}
519
520fn max_own_frame(symbols: &[SymbolReport]) -> Option<SymbolMetric> {
521 symbols
522 .iter()
523 .filter_map(|symbol| symbol.own_frame.bytes.map(|bytes| (symbol, bytes)))
524 .max_by_key(|(_, bytes)| *bytes)
525 .map(|(symbol, bytes)| SymbolMetric {
526 symbol_id: symbol.id,
527 bytes,
528 demangled: symbol.demangled.clone(),
529 })
530}
531
532fn max_worst_path(symbols: &[SymbolReport]) -> Option<SymbolMetric> {
533 symbols
534 .iter()
535 .filter_map(|symbol| symbol.worst_path.bytes.map(|bytes| (symbol, bytes)))
536 .max_by_key(|(_, bytes)| *bytes)
537 .map(|(symbol, bytes)| SymbolMetric {
538 symbol_id: symbol.id,
539 bytes,
540 demangled: symbol.demangled.clone(),
541 })
542}
543
544fn object_format(file: &object::File<'_>) -> ObjectFormat {
545 match file.format() {
546 object::BinaryFormat::Elf => ObjectFormat::Elf,
547 object::BinaryFormat::Coff | object::BinaryFormat::Pe => ObjectFormat::PeCoff,
548 object::BinaryFormat::MachO => ObjectFormat::MachO,
549 object::BinaryFormat::Wasm => ObjectFormat::Wasm,
550 _ => ObjectFormat::Unknown,
551 }
552}
553
554fn pointer_width(file: &object::File<'_>) -> Option<u8> {
555 if file.is_64() {
556 Some(64)
557 } else {
558 Some(32)
559 }
560}
561
562#[derive(Debug, Clone)]
563struct FrameRecord {
564 bytes: u64,
565 end: Option<u64>,
566 source: EvidenceSource,
567 confidence: Confidence,
568 note: String,
569}
570
571#[derive(Debug)]
572struct RawSymbol {
573 name: String,
574 address: u64,
575 size: u64,
576 source_location: Option<crate::SourceLocation>,
577}
578
579#[derive(Debug)]
580struct SectionBytes<'a> {
581 address: u64,
582 data: &'a [u8],
583}
584
585#[derive(Debug)]
586struct ScannedEdge {
587 target: Option<u64>,
588 kind: ScannedEdgeKind,
589}
590
591#[derive(Debug, Clone, Copy)]
592enum ScannedEdgeKind {
593 Call,
594 Jump,
595 IndirectCall,
596}
597
598#[derive(Debug, thiserror::Error)]
599pub enum AnalyzeError {
600 #[error("failed to read artifact {path}: {source}")]
601 ReadArtifact {
602 path: Utf8PathBuf,
603 source: std::io::Error,
604 },
605 #[error("failed to parse object file {path}: {message}")]
606 ParseObject { path: Utf8PathBuf, message: String },
607}