Skip to main content

pyro_artifacts/
debug.rs

1//! Advanced debugging tools for artifacts
2
3use std::{borrow::Cow, collections::HashMap};
4
5use crate::artifacts::append_file;
6use crate::artifacts::{Artifact, CapabilityBinary, ModuleBinary};
7use flate2::Compression;
8use flate2::read::GzDecoder;
9use flate2::write::GzEncoder;
10use object::{BinaryFormat, Object, ObjectSection, ObjectSymbol, SymbolKind};
11use std::io::{self, Read};
12use std::path::Path;
13use tar::Builder;
14use tokio::fs;
15
16pub fn wat(module: &ModuleBinary) -> Result<String, String> {
17    wasmprinter::print_bytes(&module.wasm)
18        .map_err(|e| format!("Failed to convert WASM to WAT: {}", e))
19}
20
21#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
22pub struct CapSymbol {
23    pub name: String,
24    pub address: u64,
25    pub signature: Option<String>,
26}
27
28/// Wraps the symbols with the detected file format of their parent binary
29#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
30pub enum CapSymbols {
31    Elf(Vec<CapSymbol>),
32    MachO(Vec<CapSymbol>),
33    Pe(Vec<CapSymbol>),
34    Unknown(Vec<CapSymbol>),
35}
36
37pub struct CapabilityDebug {
38    pub symbols: Vec<Result<CapSymbols, String>>,
39    pub cap_rs: Option<String>,
40}
41
42impl Artifact for CapabilityDebug {
43    async fn write_to_directory(&self, path: &Path) -> std::io::Result<()> {
44        fs::create_dir_all(path).await?;
45        for sym in &self.symbols {
46            let (name, content) = match sym {
47                Ok(CapSymbols::Elf(sym)) => ("elf.json", sym),
48                Ok(CapSymbols::MachO(sym)) => ("macho.json", sym),
49                Ok(CapSymbols::Pe(sym)) => ("pe.json", sym),
50                Ok(CapSymbols::Unknown(sym)) => ("Unknown.json", sym),
51                Err(error) => {
52                    tracing::error!(error, "Unable to get one set of symbols");
53                    continue;
54                }
55            };
56            match serde_json::to_string_pretty(&content) {
57                Ok(content) => fs::write(path.join(name), content).await?,
58                Err(error) => {
59                    tracing::error!(?error, "Unable to serialize symbols");
60                    continue;
61                }
62            }
63        }
64
65        if let Some(code) = &self.cap_rs {
66            fs::write(path.join("cap.rs"), code).await?;
67        }
68
69        Ok(())
70    }
71
72    fn to_tarball(&self) -> Result<Vec<u8>, io::Error> {
73        let encoder = GzEncoder::new(Vec::new(), Compression::default());
74        let mut tar = Builder::new(encoder);
75
76        for sym in &self.symbols {
77            let (name, content) = match sym {
78                Ok(CapSymbols::Elf(sym)) => ("elf.json", sym),
79                Ok(CapSymbols::MachO(sym)) => ("macho.json", sym),
80                Ok(CapSymbols::Pe(sym)) => ("pe.json", sym),
81                Ok(CapSymbols::Unknown(sym)) => ("Unknown.json", sym),
82                Err(_) => continue,
83            };
84            let content = serde_json::to_vec_pretty(&content).map_err(|e| {
85                io::Error::new(io::ErrorKind::InvalidData, format!("JSON error: {}", e))
86            })?;
87            append_file(&mut tar, name, &content)?;
88        }
89
90        if let Some(code) = &self.cap_rs {
91            append_file(&mut tar, "cap.rs", code.as_bytes())?;
92        }
93
94        tar.into_inner()?.finish()
95    }
96
97    fn from_tarball(bytes: &[u8]) -> Result<Self, io::Error> {
98        let tar = GzDecoder::new(bytes);
99        let mut archive = tar::Archive::new(tar);
100
101        let mut symbols = Vec::new();
102        let mut cap_rs = None;
103
104        for file in archive.entries()? {
105            let mut file = file?;
106            let path = file.path()?.to_path_buf();
107            let mut content = Vec::new();
108            file.read_to_end(&mut content)?;
109
110            let filename = path.to_string_lossy();
111            if filename.ends_with(".json") {
112                let sym: Vec<CapSymbol> = serde_json::from_slice(&content)
113                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
114                let cap_symbols = if filename == "elf.json" {
115                    CapSymbols::Elf(sym)
116                } else if filename == "macho.json" {
117                    CapSymbols::MachO(sym)
118                } else if filename == "pe.json" {
119                    CapSymbols::Pe(sym)
120                } else {
121                    CapSymbols::Unknown(sym)
122                };
123                symbols.push(Ok(cap_symbols));
124            } else if filename == "cap.rs" {
125                cap_rs = String::from_utf8(content).ok();
126            }
127        }
128
129        Ok(CapabilityDebug { symbols, cap_rs })
130    }
131
132    async fn from_dir(path: &Path) -> Result<Self, io::Error> {
133        let mut symbols = Vec::new();
134        let mut cap_rs = None;
135
136        let names = ["elf.json", "macho.json", "pe.json", "Unknown.json"];
137        for name in names {
138            let full_path = path.join(name);
139            if let Ok(content) = fs::read(&full_path).await {
140                let sym: Vec<CapSymbol> = serde_json::from_slice(&content).map_err(|e| {
141                    io::Error::new(
142                        io::ErrorKind::InvalidData,
143                        format!("Failed to parse {}: {}", name, e),
144                    )
145                })?;
146                let cap_symbols = match name {
147                    "elf.json" => CapSymbols::Elf(sym),
148                    "macho.json" => CapSymbols::MachO(sym),
149                    "pe.json" => CapSymbols::Pe(sym),
150                    _ => CapSymbols::Unknown(sym),
151                };
152                symbols.push(Ok(cap_symbols));
153            }
154        }
155
156        if let Ok(code) = fs::read_to_string(path.join("cap.rs")).await {
157            cap_rs = Some(code);
158        }
159
160        Ok(CapabilityDebug { symbols, cap_rs })
161    }
162}
163
164pub struct ModuleDebug {
165    pub wat: Option<String>,
166    pub cap_rs: Option<String>,
167}
168
169impl Artifact for ModuleDebug {
170    async fn write_to_directory(&self, path: &Path) -> std::io::Result<()> {
171        fs::create_dir_all(path).await?;
172        if let Some(wat) = &self.wat {
173            fs::write(path.join("mod.wat"), wat).await?;
174        }
175        if let Some(code) = &self.cap_rs {
176            fs::write(path.join("cap.rs"), code).await?;
177        }
178        Ok(())
179    }
180
181    fn to_tarball(&self) -> Result<Vec<u8>, io::Error> {
182        let encoder = GzEncoder::new(Vec::new(), Compression::default());
183        let mut tar = Builder::new(encoder);
184
185        if let Some(wat) = &self.wat {
186            append_file(&mut tar, "mod.wat", wat.as_bytes())?;
187        }
188        if let Some(code) = &self.cap_rs {
189            append_file(&mut tar, "cap.rs", code.as_bytes())?;
190        }
191
192        tar.into_inner()?.finish()
193    }
194
195    fn from_tarball(bytes: &[u8]) -> Result<Self, io::Error> {
196        let tar = GzDecoder::new(bytes);
197        let mut archive = tar::Archive::new(tar);
198
199        let mut wat = None;
200        let mut cap_rs = None;
201
202        for file in archive.entries()? {
203            let mut file = file?;
204            let path = file.path()?.to_path_buf();
205            let mut content = Vec::new();
206            file.read_to_end(&mut content)?;
207
208            match path.to_string_lossy().as_ref() {
209                "mod.wat" => wat = String::from_utf8(content).ok(),
210                "cap.rs" => cap_rs = String::from_utf8(content).ok(),
211                _ => {}
212            }
213        }
214
215        Ok(ModuleDebug { wat, cap_rs })
216    }
217
218    async fn from_dir(path: &Path) -> Result<Self, io::Error> {
219        let mut wat = None;
220        let mut cap_rs = None;
221
222        if let Ok(w) = fs::read_to_string(path.join("mod.wat")).await {
223            wat = Some(w);
224        }
225        if let Ok(c) = fs::read_to_string(path.join("cap.rs")).await {
226            cap_rs = Some(c);
227        }
228
229        Ok(ModuleDebug { wat, cap_rs })
230    }
231}
232
233/// Scans a dynamic library and uses DWARF debug info to reconstruct signatures
234pub fn symbols(capability: &CapabilityBinary) -> Vec<Result<CapSymbols, String>> {
235    let mut results = Vec::new();
236
237    for (index, bin) in capability.libs.iter().enumerate() {
238        let data: &[u8] = &**bin;
239
240        let file = match object::File::parse(data) {
241            Ok(f) => f,
242            Err(e) => {
243                results.push(Err(format!(
244                    "Failed to parse binary at index {}: {}",
245                    index, e
246                )));
247                continue;
248            }
249        };
250
251        let mut symbols = HashMap::new();
252        for symbol in file.symbols() {
253            if symbol.kind() == SymbolKind::Text && symbol.is_global() && !symbol.is_undefined() {
254                let name = symbol.name().unwrap_or("<unknown>");
255
256                // Target specifically exports that start with "p_"
257                if name.trim_start_matches('_').starts_with("p_") {
258                    symbols.insert(
259                        name.to_string(),
260                        CapSymbol {
261                            name: name.to_string(),
262                            address: symbol.address(),
263                            signature: None,
264                        },
265                    );
266                }
267            }
268        }
269
270        if symbols.is_empty() {
271            continue;
272        }
273
274        let endian = if file.is_little_endian() {
275            gimli::RunTimeEndian::Little
276        } else {
277            gimli::RunTimeEndian::Big
278        };
279
280        let load_section = |id: gimli::SectionId| -> Result<Cow<[u8]>, gimli::Error> {
281            match file.section_by_name(id.name()) {
282                Some(ref section) => Ok(section
283                    .uncompressed_data()
284                    .unwrap_or(Cow::Borrowed(&[][..]))),
285                None => Ok(Cow::Borrowed(&[][..])),
286            }
287        };
288
289        let wrap_symbols = |syms: HashMap<String, CapSymbol>| -> CapSymbols {
290            let vec_syms = syms.into_values().collect();
291            match file.format() {
292                BinaryFormat::Elf => CapSymbols::Elf(vec_syms),
293                BinaryFormat::MachO => CapSymbols::MachO(vec_syms),
294                BinaryFormat::Pe => CapSymbols::Pe(vec_syms),
295                _ => CapSymbols::Unknown(vec_syms),
296            }
297        };
298
299        let dwarf_sections = match gimli::DwarfSections::load(&load_section) {
300            Ok(s) => s,
301            Err(_) => {
302                results.push(Ok(wrap_symbols(symbols)));
303                continue;
304            }
305        };
306
307        let dwarf = dwarf_sections.borrow(|section| gimli::EndianSlice::new(&*section, endian));
308
309        if let Err(e) = enrich_signatures_with_dwarf(&dwarf, endian, &mut symbols) {
310            results.push(Err(format!("DWARF parsing error: {}", e)));
311        }
312
313        results.push(Ok(wrap_symbols(symbols)));
314    }
315
316    results
317}
318
319fn enrich_signatures_with_dwarf(
320    dwarf: &gimli::Dwarf<gimli::EndianSlice<gimli::RunTimeEndian>>,
321    _endian: gimli::RunTimeEndian,
322    symbols: &mut HashMap<String, CapSymbol>,
323) -> Result<(), gimli::Error> {
324    let mut iter = dwarf.units();
325
326    while let Some(header) = iter.next()? {
327        let unit = dwarf.unit(header)?;
328        let mut entries = unit.entries();
329
330        while let Some(entry) = entries.next_dfs()? {
331            if entry.tag == gimli::DW_TAG_subprogram {
332                let name_attr = entry
333                    .attr_value(gimli::DW_AT_linkage_name)
334                    .or(entry.attr_value(gimli::DW_AT_name));
335
336                let name = if let Some(attr) = name_attr {
337                    // Use `attr_string` to natively handle both string offsets and inline strings
338                    if let Ok(s) = dwarf.attr_string(&unit, attr) {
339                        s.to_string_lossy().into_owned()
340                    } else {
341                        continue;
342                    }
343                } else {
344                    continue;
345                };
346
347                if let Some(sym) = symbols.get_mut(&name) {
348                    let mut args = Vec::new();
349                    let mut return_type = String::from("()");
350
351                    if let Some(gimli::AttributeValue::UnitRef(offset)) =
352                        entry.attr_value(gimli::DW_AT_type)
353                    {
354                        // Pass `dwarf` down into the type resolver
355                        return_type = resolve_dwarf_type(dwarf, &unit, offset)
356                            .unwrap_or_else(|| "Unknown".to_string());
357                    }
358
359                    let mut children = unit.entries_at_offset(entry.offset)?;
360                    children.next_dfs()?;
361
362                    while let Some(child) = children.next_dfs()? {
363                        if child.depth <= 0 {
364                            break;
365                        }
366
367                        if child.tag == gimli::DW_TAG_formal_parameter {
368                            if let Some(gimli::AttributeValue::UnitRef(offset)) =
369                                child.attr_value(gimli::DW_AT_type)
370                            {
371                                // Pass `dwarf` down into the type resolver
372                                args.push(
373                                    resolve_dwarf_type(dwarf, &unit, offset)
374                                        .unwrap_or_else(|| "Unknown".to_string()),
375                                );
376                            }
377                        }
378                    }
379
380                    sym.signature = Some(format!(
381                        "fn {}({}) -> {}",
382                        name,
383                        args.join(", "),
384                        return_type
385                    ));
386                }
387            }
388        }
389    }
390    Ok(())
391}
392
393/// Recursively resolves DWARF types (handling pointers, modifiers, and base types)
394fn resolve_dwarf_type(
395    dwarf: &gimli::Dwarf<gimli::EndianSlice<gimli::RunTimeEndian>>,
396    unit: &gimli::Unit<gimli::EndianSlice<gimli::RunTimeEndian>, usize>,
397    offset: gimli::UnitOffset,
398) -> Option<String> {
399    let mut entries = unit.entries_at_offset(offset).ok()?;
400    let entry = entries.next_dfs().ok()??;
401
402    // 1. If this DWARF node has a name directly (e.g., base type, struct, typedef), return it.
403    if let Some(attr) = entry.attr_value(gimli::DW_AT_name) {
404        if let Ok(s) = dwarf.attr_string(unit, attr) {
405            return Some(s.to_string_lossy().into_owned());
406        }
407    }
408
409    // 2. Format pointer types natively
410    if entry.tag == gimli::DW_TAG_pointer_type {
411        if let Some(gimli::AttributeValue::UnitRef(inner_offset)) =
412            entry.attr_value(gimli::DW_AT_type)
413        {
414            return Some(format!(
415                "*{}",
416                resolve_dwarf_type(dwarf, unit, inner_offset).unwrap_or_else(|| "void".into())
417            ));
418        }
419        return Some("*void".to_string());
420    }
421
422    // 3. For unnamed modifiers (const, volatile) or typedefs, follow the DW_AT_type chain down.
423    if let Some(gimli::AttributeValue::UnitRef(inner_offset)) = entry.attr_value(gimli::DW_AT_type)
424    {
425        return resolve_dwarf_type(dwarf, unit, inner_offset);
426    }
427
428    // 4. Fallbacks for unnamed composite types
429    match entry.tag {
430        gimli::DW_TAG_structure_type => Some("<unnamed struct>".to_string()),
431        gimli::DW_TAG_union_type => Some("<unnamed union>".to_string()),
432        gimli::DW_TAG_enumeration_type => Some("<unnamed enum>".to_string()),
433        gimli::DW_TAG_subroutine_type => Some("<function pointer>".to_string()),
434        _ => None,
435    }
436}