bindgen_helpers 0.5.1

Utilities to rename, change case, and fix Rust code generated by bindgen from C headers
Documentation
use bindgen::callbacks::ItemInfo;
pub use convert_case::Case;
use convert_case::Casing as _;
pub use regex::Regex;
use std::collections::HashMap;

use crate::callbacks::{EnumVariantValue, ParseCallbacks};

/// Define the rules how a C identifier should be renamed.
#[derive(Debug, Default)]
pub struct IdentRenamer {
    /// Any regexes to remove substrings from the value. Applied in the given order before any explicit renaming.
    pub remove: Option<Vec<Regex>>,
    /// Explicit renaming once all matching strings are removed. If a match is found, skips automatic case change.
    pub renames: HashMap<String, String>,
    /// Which case to convert the value to, unless explicitly renamed.
    pub case: Option<Case<'static>>,
}

impl IdentRenamer {
    /// Create a new `IdentRenamer` instance, specifying the case to convert to.
    #[must_use]
    pub fn default_case(case: Case<'static>) -> Self {
        Self {
            case: Some(case),
            ..Default::default()
        }
    }

    fn apply(&self, val: &str) -> String {
        let mut val = val.to_owned();
        if let Some(remove) = &self.remove {
            for re in remove {
                val = re.replace(&val, "").into();
            }
        }
        if let Some(new_val) = self.renames.get(val.as_str()) {
            new_val.to_string()
        } else if let Some(case) = self.case {
            val.to_case(case)
        } else {
            val
        }
    }
}

/// Renamer is a struct that implements the `ParseCallbacks` trait.
/// It is used to rename C items like enums, structs, and typedefs in the generated Rust bindings.
///
/// # Usage
///
/// To use `Renamer` from your `build.rs`, you need to create a new instance of it and configure it with the renaming rules.
/// You can rename items by their name or by using regex patterns. You can also specify how to rename enum values.
///
/// ```
/// // build.rs
/// use bindgen_helpers::{Builder, Renamer, rename_enum};
///
/// // true to enable debug output as warnings
/// let mut ren = Renamer::new(true);
///
/// // rename a single item, e.g., a struct, enum, or a typedef
/// ren.rename_item("my_struct", "MyStruct");
///
/// // rename an enum and its values
/// rename_enum!(
///     ren,
///     "my_enum" => "MyEnum", // rename the enum itself
///     remove: "^I_SAID_",    // optionally any number of "remove" regexes
///     remove: "_ENUM$",
///     case: Pascal,          // optionally set case convert, defaults to "PascalCase"
///     "MV_IT" => "Value1",   // rename a specific value after pattern removal
///     "MV_IT2" => "Value2",  // more specific value renames
/// );
///
/// let bindings = Builder::default()
///     // in real code, use .header("path/to/header.h")
///     .header_contents("test.h", r#"
///
/// struct my_struct {
///     int a;
/// };
///
/// enum my_enum {
///     I_SAID_YES_ENUM,
///     I_SAID_NO_ENUM,
///     I_SAID_MV_IT_ENUM,
///     I_SAID_MV_IT2_ENUM,
/// };
///
/// "#)
///     // note that generated regex str includes all the renames, not just enums
///     .rustified_enum(ren.get_regex_str())
///     .parse_callbacks(Box::new(ren))
///     .generate().unwrap();
/// ```
///
/// # Generated Code
///
/// This is the approximate code that would be generated by the above:
///
/// ```rust,ignore
/// #[repr(C)]
/// #[derive(Debug, Copy, Clone)]
/// pub struct MyStruct {
///   pub a: ::std::os::raw::c_int,
/// }
///
/// #[repr(u32)]
/// #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
/// pub enum MyEnum {
///   Yes = 0,
///   No = 1,
///   Value1 = 2,
///   Value2 = 3,
/// }
/// ```
///
/// <!-- This code would generate the actual test output, but it is not stable enough to always run
///
///   // Output the generated code to a string.
///   // In real code, use .write_to_file("bindings.rs")
///   let mut output = Vec::new();
///   bindings.write(Box::new(&mut output)).unwrap();
///   let output = String::from_utf8(output).unwrap();
///
///   assert_eq!(output, r##"#[repr(C)]
/// #[derive(Debug, Copy, Clone)]
/// pub struct MyStruct {
///     pub a: ::std::os::raw::c_int,
/// }
/// #[allow(clippy::unnecessary_operation, clippy::identity_op)]
/// const _: () = {
///     ["Size of MyStruct"][::std::mem::size_of::<MyStruct>() - 4usize];
///     ["Alignment of MyStruct"][::std::mem::align_of::<MyStruct>() - 4usize];
///     ["Offset of field: MyStruct::a"][::std::mem::offset_of!(MyStruct, a) - 0usize];
/// };
/// #[repr(u32)]
/// #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
/// pub enum MyEnum {
///     Yes = 0,
///     No = 1,
///     Value1 = 2,
///     Value2 = 3,
/// }
/// "##);
/// -->
#[derive(Debug, Default)]
pub struct Renamer {
    /// Enable debug output
    debug: bool,
    /// Rename C items like enums, structs, and aliases, replacing them with a new name.
    item_renames: HashMap<String, String>,
    /// Rename C items like enums, structs, and aliases that match a regex and apply a renamer.
    /// The regex string must not contain '^' or '$' symbols.
    item_renames_ext: Vec<(Regex, IdentRenamer)>,
    /// Matches C enum names.
    /// Note that the regex might be None because the callback might also not have it for some enums.
    ///
    /// As of 1.71.1, bindgen reports `enum foo {...}` and `typedef enum {...} foo` differently.
    /// In the first case, `enum_c_name` is "enum foo", and in the second it's just "foo".
    /// See [issue](https://github.com/rust-lang/rust-bindgen/issues/3113#issuecomment-2844178132)
    enum_renames: Vec<(Option<Regex>, IdentRenamer)>,
}

impl Renamer {
    /// Create a new `Renamer` instance, with the option to enable debug output.
    #[must_use]
    pub fn new(debug: bool) -> Self {
        Self {
            debug,
            ..Default::default()
        }
    }

    /// Get a regex string that matches all configured C items
    #[must_use]
    pub fn get_regex_str(&self) -> String {
        self.item_renames_ext
            .iter()
            .map(|(re, _)| re.as_str())
            .chain(self.item_renames.keys().map(String::as_str))
            .fold(String::new(), |mut acc, re| {
                if !acc.is_empty() {
                    acc.push('|');
                }
                acc.push_str(re);
                acc
            })
    }

    /// Rename a single C item, e.g., a struct, enum, or a typedef.
    pub fn rename_item(
        &mut self,
        c_name: impl AsRef<str>,
        rust_name: impl AsRef<str>,
    ) {
        self.item_renames
            .insert(c_name.as_ref().into(), rust_name.as_ref().into());
    }

    /// Rename any C item, including enums and structs.
    ///
    /// # Panics
    /// Will panic if the regex contains '^' or '$' symbols.
    pub fn rename_many(&mut self, c_name: Regex, renamer: IdentRenamer) {
        assert!(
            !c_name.as_str().contains('^'),
            "Regex must not contain '^' symbol"
        );
        assert!(
            !c_name.as_str().contains('$'),
            "Regex must not contain '$' symbol"
        );
        self.item_renames_ext.push((c_name, renamer));
    }

    /// Rename enum values.
    ///
    /// `enum_c_name` should be set like this `Same("(enum )?some_name")`
    /// due to [this issue](https://github.com/rust-lang/rust-bindgen/issues/3113#issuecomment-2844178132)
    ///
    /// # Panics
    /// Will panic if the `enum_c_name` is not a valid regex.
    pub fn rename_enum_val(
        &mut self,
        enum_c_name: Option<&str>,
        val_renamer: IdentRenamer,
    ) {
        self.enum_renames.push((
            enum_c_name
                .map(|v| Regex::new(v).expect("Invalid enum_c_name regex")),
            val_renamer,
        ));
    }
}

impl ParseCallbacks for Renamer {
    fn enum_variant_name(
        &self,
        enum_name: Option<&str>,
        value: &str,
        _variant_value: EnumVariantValue,
    ) -> Option<String> {
        self.enum_renames
            .iter()
            .filter_map(|(re, rn)| match (enum_name, re) {
                (Some(enum_name), Some(re)) if re.is_match(enum_name) => Some(rn),
                (None, None) => Some(rn),
                _ => None,
            })
            .map(|rn| rn.apply(value))
            .next()
            .or_else(|| {
                if self.debug {
                    let name = enum_name.unwrap_or_default();
                    println!("cargo::warning=Unrecognized enum variant {name} :: {value}");
                }
                None
            })
    }

    fn item_name(&self, item_name: ItemInfo<'_>) -> Option<String> {
        let item_name = item_name.name;
        self.item_renames
            .get(item_name)
            .map(ToString::to_string)
            .or_else(|| {
                self.item_renames_ext
                    .iter()
                    .filter_map(|(re, rn)| {
                        if re.is_match(item_name) {
                            Some(rn)
                        } else {
                            None
                        }
                    })
                    .map(|rn| rn.apply(item_name))
                    .next()
            })
            .or_else(|| {
                if self.debug {
                    println!("cargo::warning=Unrecognized item {item_name}");
                }
                None
            })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_get_regex_str() {
        let mut cb = Renamer::new(false);
        cb.rename_item("bar", "baz");
        cb.rename_many(Regex::new(r"foo.*").unwrap(), IdentRenamer::default());
        cb.rename_many(Regex::new("bas").unwrap(), IdentRenamer::default());
        assert_eq!(cb.get_regex_str(), "foo.*|bas|bar");
    }
}