hc_uniffi_bindgen 0.29.1

a multi-language bindings generator for rust (codegen and cli tooling)
Documentation
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use anyhow::{anyhow, bail, Context};
use camino::Utf8Path;
use fs_err as fs;
use goblin::{
    archive::Archive,
    elf::Elf,
    mach::{segment::Section, symbols, Mach, MachO, SingleArch},
    pe::PE,
    Object,
};
use std::collections::HashSet;
use uniffi_meta::Metadata;

/// Extract metadata written by the `uniffi::export` macro from a library file
///
/// In addition to generating the scaffolding, that macro and also encodes the
/// `uniffi_meta::Metadata` for the components which can be used to generate the bindings side of
/// the interface.
pub fn extract_from_library(path: &Utf8Path) -> anyhow::Result<Vec<Metadata>> {
    extract_from_bytes(&fs::read(path)?)
}

fn extract_from_bytes(file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    match Object::parse(file_data)? {
        Object::Elf(elf) => extract_from_elf(elf, file_data),
        Object::PE(pe) => extract_from_pe(pe, file_data),
        Object::Mach(mach) => extract_from_mach(mach, file_data),
        Object::Archive(archive) => extract_from_archive(archive, file_data),
        _ => bail!("Unknown library format"),
    }
}

pub fn extract_from_elf(elf: Elf<'_>, file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    let mut extracted = ExtractedItems::new();

    // Some ELF files have a SHT_SYMTAB_SHNDX section that we use below.  If present, find the
    // offset for that section.
    let symtab_shndx_section_offset = elf
        .section_headers
        .iter()
        .find(|sh| sh.sh_type == goblin::elf::section_header::SHT_SYMTAB_SHNDX)
        .map(|sh| sh.sh_offset as usize);

    for (i, sym) in elf.syms.iter().enumerate() {
        let name = elf
            .strtab
            .get_at(sym.st_name)
            .context("Error getting symbol name")?;
        if !is_metadata_symbol(name) {
            continue;
        }

        let header_index = match sym.st_shndx as u32 {
            goblin::elf::section_header::SHN_XINDEX => {
                // The section header index overflowed 16 bits and we have to look it up from
                // the extended index table instead.  Each item in the SHT_SYMTAB_SHNDX section is
                // a 32-bit value even for a 64-bit ELF objects.
                let section_offset = symtab_shndx_section_offset
                    .ok_or_else(|| anyhow!("Symbol {name} has st_shndx=SHN_XINDEX, but no SHT_SYMTAB_SHNDX section present"))?;

                let offset = section_offset + (i * 4);
                let slice = file_data.get(offset..offset + 4).ok_or_else(|| {
                    anyhow!("Index error looking up {name} in the SHT_SYMTAB_SHNDX section")
                })?;
                // If the last statement succeeded, the slice is exactly 4 bytes, so this try_into
                // will never fail
                let byte_array = slice.try_into().unwrap();
                if elf.little_endian {
                    u32::from_le_bytes(byte_array) as usize
                } else {
                    u32::from_be_bytes(byte_array) as usize
                }
            }
            // The normal case is that we can just use `st_shndx`
            _ => sym.st_shndx,
        };

        let sh = elf
            .section_headers
            .get(header_index)
            .ok_or_else(|| anyhow!("Index error looking up section header for {name}"))?;

        // Offset relative to the start of the section.
        let section_offset = sym.st_value - sh.sh_addr;
        extracted.extract_item(name, file_data, (sh.sh_offset + section_offset) as usize)?;
    }
    Ok(extracted.into_metadata())
}

pub fn extract_from_pe(pe: PE<'_>, file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    let mut extracted = ExtractedItems::new();
    for export in pe.exports {
        if let Some(name) = export.name {
            if is_metadata_symbol(name) {
                extracted.extract_item(
                    name,
                    file_data,
                    export.offset.context("Error getting symbol offset")?,
                )?;
            }
        }
    }
    Ok(extracted.into_metadata())
}

pub fn extract_from_mach(mach: Mach<'_>, file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    match mach {
        Mach::Binary(macho) => extract_from_macho(macho, file_data),
        // Multi-binary library, just extract the first one
        Mach::Fat(multi_arch) => match multi_arch.get(0)? {
            SingleArch::MachO(macho) => extract_from_macho(macho, file_data),
            SingleArch::Archive(archive) => extract_from_archive(archive, file_data),
        },
    }
}

pub fn extract_from_macho(macho: MachO<'_>, file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    let mut sections: Vec<Section> = Vec::new();
    for sects in macho.segments.sections() {
        sections.extend(sects.map(|r| r.expect("section").0));
    }
    let mut extracted = ExtractedItems::new();
    sections.sort_by_key(|s| s.addr);

    // Iterate through the symbols.  This picks up symbols from the .o files embedded in a Darwin
    // archive.
    for (name, nlist) in macho.symbols().flatten() {
        // Check that the symbol:
        //   - Is global (exported)
        //   - Has type=N_SECT (it's regular data as opposed to something like
        //     "undefined" or "indirect")
        //   - Has a metadata symbol name
        if nlist.is_global()
            && nlist.get_type() == symbols::N_SECT
            && is_metadata_symbol(name)
            && nlist.n_sect != symbols::NO_SECT as usize
        {
            let section = &sections[nlist.n_sect - 1];

            // `nlist.n_value` is an address, so we can calculating the offset inside the section
            // using the difference between that and `section.addr`
            let offset = section.offset as usize + nlist.n_value as usize - section.addr as usize;
            extracted.extract_item(name, file_data, offset)?;
        }
    }

    // Iterate through the exports.  This picks up symbols from .dylib files.
    for export in macho.exports()? {
        let name = &export.name;
        if is_metadata_symbol(name) {
            extracted.extract_item(name, file_data, export.offset as usize)?;
        }
    }
    Ok(extracted.into_metadata())
}

pub fn extract_from_archive(
    archive: Archive<'_>,
    file_data: &[u8],
) -> anyhow::Result<Vec<Metadata>> {
    // Store the names of archive members that have metadata symbols in them
    let mut members_to_check: HashSet<&str> = HashSet::new();
    for (member_name, _, symbols) in archive.summarize() {
        for name in symbols {
            if is_metadata_symbol(name) {
                members_to_check.insert(member_name);
            }
        }
    }

    let mut items = vec![];
    for member_name in members_to_check {
        items.append(
            &mut extract_from_bytes(
                archive
                    .extract(member_name, file_data)
                    .with_context(|| format!("Failed to extract archive member `{member_name}`"))?,
            )
            .with_context(|| {
                format!("Failed to extract data from archive member `{member_name}`")
            })?,
        );
    }
    Ok(items)
}

/// Container for extracted metadata items
#[derive(Default)]
struct ExtractedItems {
    items: Vec<Metadata>,
    /// symbol names for the extracted items, we use this to ensure that we don't extract the same
    /// symbol twice
    names: HashSet<String>,
}

impl ExtractedItems {
    fn new() -> Self {
        Self::default()
    }

    fn extract_item(&mut self, name: &str, file_data: &[u8], offset: usize) -> anyhow::Result<()> {
        if self.names.contains(name) {
            // Already extracted this item
            return Ok(());
        }

        // Use the file data starting from offset, without specifying the end position.  We don't
        // always know the end position, because goblin reports the symbol size as 0 for PE and
        // MachO files.
        //
        // This works fine, because `MetadataReader` knows when the serialized data is terminated
        // and will just ignore the trailing data.
        let data = &file_data[offset..];
        self.items.push(
            Metadata::read(data).with_context(|| format!("extracting metadata for '{name}'"))?,
        );
        self.names.insert(name.to_string());
        Ok(())
    }

    fn into_metadata(self) -> Vec<Metadata> {
        self.items
    }
}

fn is_metadata_symbol(name: &str) -> bool {
    // Skip the "_" char that Darwin prepends, if present
    let name = name.strip_prefix('_').unwrap_or(name);
    name.starts_with("UNIFFI_META")
}