Skip to main content

pyro_artifacts/
debug.rs

1//! Advanced debugging tools for artifacts
2
3use std::{borrow::Cow, collections::HashMap};
4
5use crate::artifacts::append_file;
6use crate::artifacts::{Artifact, CapabilityBinary, PlaybookBinary};
7use flate2::Compression;
8use flate2::read::GzDecoder;
9use flate2::write::GzEncoder;
10use object::{BinaryFormat, Object, ObjectSection, ObjectSymbol, SymbolKind};
11use std::io::{self, Read};
12use std::path::Path;
13use tar::Builder;
14use tokio::fs;
15
16pub fn wat(module: &PlaybookBinary) -> Result<String, String> {
17    wasmprinter::print_bytes(&module.wasm)
18        .map_err(|e| format!("Failed to convert WASM to WAT: {}", e))
19}
20
21#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
22pub struct CapSymbol {
23    pub name: String,
24    pub address: u64,
25    pub signature: Option<String>,
26}
27
28/// Wraps the symbols with the detected file format of their parent binary
29#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
30pub enum CapSymbols {
31    Elf(Vec<CapSymbol>),
32    MachO(Vec<CapSymbol>),
33    Pe(Vec<CapSymbol>),
34    Unknown(Vec<CapSymbol>),
35}
36
37pub struct CapabilityDebug {
38    pub symbols: Vec<Result<CapSymbols, String>>,
39    pub cap_rs: Option<String>,
40}
41
42impl Artifact for CapabilityDebug {
43    async fn write_to_directory(&self, path: &Path) -> std::io::Result<()> {
44        fs::create_dir_all(path).await?;
45        for sym in &self.symbols {
46            let (name, content) = match sym {
47                Ok(CapSymbols::Elf(sym)) => ("elf.json", sym),
48                Ok(CapSymbols::MachO(sym)) => ("macho.json", sym),
49                Ok(CapSymbols::Pe(sym)) => ("pe.json", sym),
50                Ok(CapSymbols::Unknown(sym)) => ("Unknown.json", sym),
51                Err(error) => {
52                    tracing::error!(error, "Unable to get one set of symbols");
53                    continue;
54                }
55            };
56            match serde_json::to_string_pretty(&content) {
57                Ok(content) => fs::write(path.join(name), content).await?,
58                Err(error) => {
59                    tracing::error!(?error, "Unable to serialize symbols");
60                    continue;
61                }
62            }
63        }
64
65        if let Some(code) = &self.cap_rs {
66            fs::write(path.join("cap.rs"), code).await?;
67        }
68
69        Ok(())
70    }
71
72    fn to_tarball(&self) -> Result<Vec<u8>, io::Error> {
73        let encoder = GzEncoder::new(Vec::new(), Compression::default());
74        let mut tar = Builder::new(encoder);
75
76        for sym in &self.symbols {
77            let (name, content) = match sym {
78                Ok(CapSymbols::Elf(sym)) => ("elf.json", sym),
79                Ok(CapSymbols::MachO(sym)) => ("macho.json", sym),
80                Ok(CapSymbols::Pe(sym)) => ("pe.json", sym),
81                Ok(CapSymbols::Unknown(sym)) => ("Unknown.json", sym),
82                Err(_) => continue,
83            };
84            let content = serde_json::to_vec_pretty(&content).map_err(|e| {
85                io::Error::new(io::ErrorKind::InvalidData, format!("JSON error: {}", e))
86            })?;
87            append_file(&mut tar, name, &content)?;
88        }
89
90        if let Some(code) = &self.cap_rs {
91            append_file(&mut tar, "cap.rs", code.as_bytes())?;
92        }
93
94        tar.into_inner()?.finish()
95    }
96
97    fn from_tarball(bytes: &[u8]) -> Result<Self, io::Error> {
98        let tar = GzDecoder::new(bytes);
99        let mut archive = tar::Archive::new(tar);
100
101        let mut symbols = Vec::new();
102        let mut cap_rs = None;
103
104        for file in archive.entries()? {
105            let mut file = file?;
106            let path = file.path()?.to_path_buf();
107            let mut content = Vec::new();
108            file.read_to_end(&mut content)?;
109
110            let filename = path.to_string_lossy();
111            if filename.ends_with(".json") {
112                let sym: Vec<CapSymbol> = serde_json::from_slice(&content)
113                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
114                let cap_symbols = if filename == "elf.json" {
115                    CapSymbols::Elf(sym)
116                } else if filename == "macho.json" {
117                    CapSymbols::MachO(sym)
118                } else if filename == "pe.json" {
119                    CapSymbols::Pe(sym)
120                } else {
121                    CapSymbols::Unknown(sym)
122                };
123                symbols.push(Ok(cap_symbols));
124            } else if filename == "cap.rs" {
125                cap_rs = String::from_utf8(content).ok();
126            }
127        }
128
129        Ok(CapabilityDebug { symbols, cap_rs })
130    }
131
132    async fn from_dir(path: &Path) -> Result<Self, io::Error> {
133        let mut symbols = Vec::new();
134        let mut cap_rs = None;
135
136        let names = ["elf.json", "macho.json", "pe.json", "Unknown.json"];
137        for name in names {
138            let full_path = path.join(name);
139            if let Ok(content) = fs::read(&full_path).await {
140                let sym: Vec<CapSymbol> = serde_json::from_slice(&content).map_err(|e| {
141                    io::Error::new(
142                        io::ErrorKind::InvalidData,
143                        format!("Failed to parse {}: {}", name, e),
144                    )
145                })?;
146                let cap_symbols = match name {
147                    "elf.json" => CapSymbols::Elf(sym),
148                    "macho.json" => CapSymbols::MachO(sym),
149                    "pe.json" => CapSymbols::Pe(sym),
150                    _ => CapSymbols::Unknown(sym),
151                };
152                symbols.push(Ok(cap_symbols));
153            }
154        }
155
156        if let Ok(code) = fs::read_to_string(path.join("cap.rs")).await {
157            cap_rs = Some(code);
158        }
159
160        Ok(CapabilityDebug { symbols, cap_rs })
161    }
162}
163
164pub struct ModuleDebug {
165    pub wat: Option<String>,
166    pub cap_rs: Option<String>,
167}
168
169impl Artifact for ModuleDebug {
170    async fn write_to_directory(&self, path: &Path) -> std::io::Result<()> {
171        fs::create_dir_all(path).await?;
172        if let Some(wat) = &self.wat {
173            fs::write(path.join("mod.wat"), wat).await?;
174        }
175        if let Some(code) = &self.cap_rs {
176            fs::write(path.join("cap.rs"), code).await?;
177        }
178        Ok(())
179    }
180
181    fn to_tarball(&self) -> Result<Vec<u8>, io::Error> {
182        let encoder = GzEncoder::new(Vec::new(), Compression::default());
183        let mut tar = Builder::new(encoder);
184
185        if let Some(wat) = &self.wat {
186            append_file(&mut tar, "mod.wat", wat.as_bytes())?;
187        }
188        if let Some(code) = &self.cap_rs {
189            append_file(&mut tar, "cap.rs", code.as_bytes())?;
190        }
191
192        tar.into_inner()?.finish()
193    }
194
195    fn from_tarball(bytes: &[u8]) -> Result<Self, io::Error> {
196        let tar = GzDecoder::new(bytes);
197        let mut archive = tar::Archive::new(tar);
198
199        let mut wat = None;
200        let mut cap_rs = None;
201
202        for file in archive.entries()? {
203            let mut file = file?;
204            let path = file.path()?.to_path_buf();
205            let mut content = Vec::new();
206            file.read_to_end(&mut content)?;
207
208            match path.to_string_lossy().as_ref() {
209                "mod.wat" => wat = String::from_utf8(content).ok(),
210                "cap.rs" => cap_rs = String::from_utf8(content).ok(),
211                _ => {}
212            }
213        }
214
215        Ok(ModuleDebug { wat, cap_rs })
216    }
217
218    async fn from_dir(path: &Path) -> Result<Self, io::Error> {
219        let mut wat = None;
220        let mut cap_rs = None;
221
222        if let Ok(w) = fs::read_to_string(path.join("mod.wat")).await {
223            wat = Some(w);
224        }
225        if let Ok(c) = fs::read_to_string(path.join("cap.rs")).await {
226            cap_rs = Some(c);
227        }
228
229        Ok(ModuleDebug { wat, cap_rs })
230    }
231}
232
233/// Scans a dynamic library and uses DWARF debug info to reconstruct signatures
234#[tracing::instrument(skip(capability), fields(ident = ?capability.ident))]
235pub fn symbols(capability: &CapabilityBinary) -> Vec<Result<CapSymbols, String>> {
236    tracing::debug!("Extracting symbols from capability binary");
237    let mut results = Vec::new();
238
239    for (index, bin) in capability.libs.iter().enumerate() {
240        let data: &[u8] = bin;
241
242        let file = match object::File::parse(data) {
243            Ok(f) => f,
244            Err(e) => {
245                results.push(Err(format!(
246                    "Failed to parse binary at index {}: {}",
247                    index, e
248                )));
249                continue;
250            }
251        };
252
253        let mut symbols = HashMap::new();
254        for symbol in file.symbols() {
255            if symbol.kind() == SymbolKind::Text && symbol.is_global() && !symbol.is_undefined() {
256                let name = symbol.name().unwrap_or("<unknown>");
257
258                // Target specifically exports that start with "p_"
259                if name.trim_start_matches('_').starts_with("p_") {
260                    symbols.insert(
261                        name.to_string(),
262                        CapSymbol {
263                            name: name.to_string(),
264                            address: symbol.address(),
265                            signature: None,
266                        },
267                    );
268                }
269            }
270        }
271
272        if symbols.is_empty() {
273            continue;
274        }
275
276        let endian = if file.is_little_endian() {
277            gimli::RunTimeEndian::Little
278        } else {
279            gimli::RunTimeEndian::Big
280        };
281
282        let load_section = |id: gimli::SectionId| -> Result<Cow<[u8]>, gimli::Error> {
283            match file.section_by_name(id.name()) {
284                Some(ref section) => Ok(section
285                    .uncompressed_data()
286                    .unwrap_or(Cow::Borrowed(&[][..]))),
287                None => Ok(Cow::Borrowed(&[][..])),
288            }
289        };
290
291        let wrap_symbols = |syms: HashMap<String, CapSymbol>| -> CapSymbols {
292            let vec_syms = syms.into_values().collect();
293            match file.format() {
294                BinaryFormat::Elf => CapSymbols::Elf(vec_syms),
295                BinaryFormat::MachO => CapSymbols::MachO(vec_syms),
296                BinaryFormat::Pe => CapSymbols::Pe(vec_syms),
297                _ => CapSymbols::Unknown(vec_syms),
298            }
299        };
300
301        let dwarf_sections = match gimli::DwarfSections::load(&load_section) {
302            Ok(s) => s,
303            Err(_) => {
304                results.push(Ok(wrap_symbols(symbols)));
305                continue;
306            }
307        };
308
309        let dwarf = dwarf_sections.borrow(|section| gimli::EndianSlice::new(section, endian));
310
311        if let Err(e) = enrich_signatures_with_dwarf(&dwarf, endian, &mut symbols) {
312            results.push(Err(format!("DWARF parsing error: {}", e)));
313        }
314
315        results.push(Ok(wrap_symbols(symbols)));
316    }
317
318    results
319}
320
321#[tracing::instrument(skip(dwarf, symbols))]
322fn enrich_signatures_with_dwarf(
323    dwarf: &gimli::Dwarf<gimli::EndianSlice<gimli::RunTimeEndian>>,
324    _endian: gimli::RunTimeEndian,
325    symbols: &mut HashMap<String, CapSymbol>,
326) -> Result<(), gimli::Error> {
327    tracing::debug!("Enriching symbol signatures using DWARF info");
328    let mut iter = dwarf.units();
329
330    while let Some(header) = iter.next()? {
331        let unit = dwarf.unit(header)?;
332        let mut entries = unit.entries();
333
334        while let Some(entry) = entries.next_dfs()? {
335            if entry.tag == gimli::DW_TAG_subprogram {
336                let name_attr = entry
337                    .attr_value(gimli::DW_AT_linkage_name)
338                    .or(entry.attr_value(gimli::DW_AT_name));
339
340                let name = if let Some(attr) = name_attr {
341                    // Use `attr_string` to natively handle both string offsets and inline strings
342                    if let Ok(s) = dwarf.attr_string(&unit, attr) {
343                        s.to_string_lossy().into_owned()
344                    } else {
345                        continue;
346                    }
347                } else {
348                    continue;
349                };
350
351                if let Some(sym) = symbols.get_mut(&name) {
352                    let mut args = Vec::new();
353                    let mut return_type = String::from("()");
354
355                    if let Some(gimli::AttributeValue::UnitRef(offset)) =
356                        entry.attr_value(gimli::DW_AT_type)
357                    {
358                        // Pass `dwarf` down into the type resolver
359                        return_type = resolve_dwarf_type(dwarf, &unit, offset)
360                            .unwrap_or_else(|| "Unknown".to_string());
361                    }
362
363                    let mut children = unit.entries_at_offset(entry.offset)?;
364                    children.next_dfs()?;
365
366                    while let Some(child) = children.next_dfs()? {
367                        if child.depth <= 0 {
368                            break;
369                        }
370
371                        if child.tag == gimli::DW_TAG_formal_parameter
372                            && let Some(gimli::AttributeValue::UnitRef(offset)) =
373                                child.attr_value(gimli::DW_AT_type)
374                        {
375                            // Pass `dwarf` down into the type resolver
376                            args.push(
377                                resolve_dwarf_type(dwarf, &unit, offset)
378                                    .unwrap_or_else(|| "Unknown".to_string()),
379                            );
380                        }
381                    }
382
383                    sym.signature = Some(format!(
384                        "fn {}({}) -> {}",
385                        name,
386                        args.join(", "),
387                        return_type
388                    ));
389                }
390            }
391        }
392    }
393    Ok(())
394}
395
396/// Recursively resolves DWARF types (handling pointers, modifiers, and base types)
397fn resolve_dwarf_type(
398    dwarf: &gimli::Dwarf<gimli::EndianSlice<gimli::RunTimeEndian>>,
399    unit: &gimli::Unit<gimli::EndianSlice<gimli::RunTimeEndian>, usize>,
400    offset: gimli::UnitOffset,
401) -> Option<String> {
402    let mut entries = unit.entries_at_offset(offset).ok()?;
403    let entry = entries.next_dfs().ok()??;
404
405    // 1. If this DWARF node has a name directly (e.g., base type, struct, typedef), return it.
406    if let Some(attr) = entry.attr_value(gimli::DW_AT_name)
407        && let Ok(s) = dwarf.attr_string(unit, attr)
408    {
409        return Some(s.to_string_lossy().into_owned());
410    }
411
412    // 2. Format pointer types natively
413    if entry.tag == gimli::DW_TAG_pointer_type {
414        if let Some(gimli::AttributeValue::UnitRef(inner_offset)) =
415            entry.attr_value(gimli::DW_AT_type)
416        {
417            return Some(format!(
418                "*{}",
419                resolve_dwarf_type(dwarf, unit, inner_offset).unwrap_or_else(|| "void".into())
420            ));
421        }
422        return Some("*void".to_string());
423    }
424
425    // 3. For unnamed modifiers (const, volatile) or typedefs, follow the DW_AT_type chain down.
426    if let Some(gimli::AttributeValue::UnitRef(inner_offset)) = entry.attr_value(gimli::DW_AT_type)
427    {
428        return resolve_dwarf_type(dwarf, unit, inner_offset);
429    }
430
431    // 4. Fallbacks for unnamed composite types
432    match entry.tag {
433        gimli::DW_TAG_structure_type => Some("<unnamed struct>".to_string()),
434        gimli::DW_TAG_union_type => Some("<unnamed union>".to_string()),
435        gimli::DW_TAG_enumeration_type => Some("<unnamed enum>".to_string()),
436        gimli::DW_TAG_subroutine_type => Some("<function pointer>".to_string()),
437        _ => None,
438    }
439}