pgrx_bindgen/
build.rs

1//LICENSE Portions Copyright 2019-2021 ZomboDB, LLC.
2//LICENSE
3//LICENSE Portions Copyright 2021-2023 Technology Concepts & Design, Inc.
4//LICENSE
5//LICENSE Portions Copyright 2023-2023 PgCentral Foundation, Inc. <contact@pgcentral.org>
6//LICENSE
7//LICENSE All rights reserved.
8//LICENSE
9//LICENSE Use of this source code is governed by the MIT license that can be found in the LICENSE file.
10use bindgen::callbacks::{DeriveTrait, EnumVariantValue, ImplementsTrait, MacroParsingBehavior};
11use bindgen::NonCopyUnionStyle;
12use eyre::{eyre, WrapErr};
13use pgrx_pg_config::{
14    is_supported_major_version, PgConfig, PgConfigSelector, PgMinorVersion, PgVersion, Pgrx,
15    SUPPORTED_VERSIONS,
16};
17use quote::{quote, ToTokens};
18use std::cell::RefCell;
19use std::cmp::Ordering;
20use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
21use std::fs;
22use std::path::{self, Path, PathBuf}; // disambiguate path::Path and syn::Type::Path
23use std::process::{Command, Output};
24use std::rc::Rc;
25use syn::{Item, ItemConst};
26
27const BLOCKLISTED_TYPES: [&str; 4] = ["Datum", "NullableDatum", "Oid", "TransactionId"];
28
29// These postgres versions were effectively "yanked" by the community, even tho they still exist
30// in the wild.  pgrx will refuse to compile against them
31const YANKED_POSTGRES_VERSIONS: &[PgVersion] = &[
32    // this set of releases introduced an ABI break in the [`pg_sys::ResultRelInfo`] struct
33    // and was replaced by the community on 2024-11-21
34    // https://www.postgresql.org/about/news/postgresql-172-166-1510-1415-1318-and-1222-released-2965/
35    PgVersion::new(17, PgMinorVersion::Release(1), None),
36    PgVersion::new(16, PgMinorVersion::Release(5), None),
37    PgVersion::new(15, PgMinorVersion::Release(9), None),
38    PgVersion::new(14, PgMinorVersion::Release(14), None),
39    PgVersion::new(13, PgMinorVersion::Release(17), None),
40];
41
42pub(super) mod clang;
43
44#[derive(Debug)]
45struct BindingOverride {
46    ignore_macros: HashSet<&'static str>,
47    enum_names: InnerMut<EnumMap>,
48}
49
50type InnerMut<T> = Rc<RefCell<T>>;
51type EnumMap = BTreeMap<String, Vec<(String, EnumVariantValue)>>;
52
53impl BindingOverride {
54    fn new_from(enum_names: InnerMut<EnumMap>) -> Self {
55        // these cause duplicate definition problems on linux
56        // see: https://github.com/rust-lang/rust-bindgen/issues/687
57        BindingOverride {
58            ignore_macros: HashSet::from_iter([
59                "FP_INFINITE",
60                "FP_NAN",
61                "FP_NORMAL",
62                "FP_SUBNORMAL",
63                "FP_ZERO",
64                "IPPORT_RESERVED",
65                // These are just annoying due to clippy
66                "M_E",
67                "M_LOG2E",
68                "M_LOG10E",
69                "M_LN2",
70                "M_LN10",
71                "M_PI",
72                "M_PI_2",
73                "M_PI_4",
74                "M_1_PI",
75                "M_2_PI",
76                "M_SQRT2",
77                "M_SQRT1_2",
78                "M_2_SQRTPI",
79            ]),
80            enum_names,
81        }
82    }
83}
84
85impl bindgen::callbacks::ParseCallbacks for BindingOverride {
86    fn will_parse_macro(&self, name: &str) -> MacroParsingBehavior {
87        if self.ignore_macros.contains(name) {
88            bindgen::callbacks::MacroParsingBehavior::Ignore
89        } else {
90            bindgen::callbacks::MacroParsingBehavior::Default
91        }
92    }
93
94    fn blocklisted_type_implements_trait(
95        &self,
96        name: &str,
97        derive_trait: DeriveTrait,
98    ) -> Option<ImplementsTrait> {
99        if !BLOCKLISTED_TYPES.contains(&name) {
100            return None;
101        }
102
103        let implements_trait = match derive_trait {
104            DeriveTrait::Copy => ImplementsTrait::Yes,
105            DeriveTrait::Debug => ImplementsTrait::Yes,
106            _ => ImplementsTrait::No,
107        };
108        Some(implements_trait)
109    }
110
111    // FIXME: alter types on some int macros to the actually-used types so we can stop as-casting them
112    fn int_macro(&self, _name: &str, _value: i64) -> Option<bindgen::callbacks::IntKind> {
113        None
114    }
115
116    // FIXME: implement a... C compiler?
117    fn func_macro(&self, _name: &str, _value: &[&[u8]]) {}
118
119    /// Intentionally doesn't do anything, just updates internal state.
120    fn enum_variant_behavior(
121        &self,
122        enum_name: Option<&str>,
123        variant_name: &str,
124        variant_value: bindgen::callbacks::EnumVariantValue,
125    ) -> Option<bindgen::callbacks::EnumVariantCustomBehavior> {
126        enum_name.inspect(|name| match name.strip_prefix("enum").unwrap_or(name).trim() {
127            // specifically overridden enum
128            "NodeTag" => (),
129            name if name.contains("unnamed at") || name.contains("anonymous at") => (),
130            // to prevent problems with BuiltinOid
131            _ if variant_name.contains("OID") => (),
132            name => self
133                .enum_names
134                .borrow_mut()
135                .entry(name.to_string())
136                .or_default()
137                .push((variant_name.to_string(), variant_value)),
138        });
139        None
140    }
141
142    // FIXME: hide nodetag fields and default them to appropriate values
143    fn field_visibility(
144        &self,
145        _info: bindgen::callbacks::FieldInfo<'_>,
146    ) -> Option<bindgen::FieldVisibilityKind> {
147        None
148    }
149}
150
151pub fn main() -> eyre::Result<()> {
152    if env_tracked("DOCS_RS").as_deref() == Some("1") {
153        return Ok(());
154    }
155
156    // dump the environment for debugging if asked
157    if env_tracked("PGRX_BUILD_VERBOSE").as_deref() == Some("true") {
158        for (k, v) in std::env::vars() {
159            eprintln!("{k}={v}");
160        }
161    }
162
163    let compile_cshim = env_tracked("CARGO_FEATURE_CSHIM").as_deref() == Some("1");
164    let is_for_release =
165        env_tracked("PGRX_PG_SYS_GENERATE_BINDINGS_FOR_RELEASE").as_deref() == Some("1");
166
167    let build_paths = BuildPaths::from_env();
168
169    eprintln!("build_paths={build_paths:?}");
170
171    emit_rerun_if_changed();
172
173    let pg_configs: Vec<(u16, PgConfig)> = if is_for_release {
174        // This does not cross-check config.toml and Cargo.toml versions, as it is release infra.
175        Pgrx::from_config()?.iter(PgConfigSelector::All)
176            .map(|r| r.expect("invalid pg_config"))
177            .map(|c| (c.major_version().expect("invalid major version"), c))
178            .filter_map(|t| {
179                if is_supported_major_version(t.0) {
180                    Some(t)
181                } else {
182                    println!(
183                        "cargo:warning={} contains a configuration for pg{}, which pgrx does not support.",
184                        Pgrx::config_toml()
185                            .expect("Could not get PGRX configuration TOML")
186                            .to_string_lossy(),
187                        t.0
188                    );
189                    None
190                }
191            })
192            .collect()
193    } else {
194        let mut found = Vec::new();
195        for pgver in SUPPORTED_VERSIONS() {
196            if env_tracked(&format!("CARGO_FEATURE_PG{}", pgver.major)).is_some() {
197                found.push(pgver);
198            }
199        }
200        let found_ver = match &found[..] {
201            [ver] => ver,
202            [] => {
203                return Err(eyre!(
204                    "Did not find `pg$VERSION` feature. `pgrx-pg-sys` requires one of {} to be set",
205                    SUPPORTED_VERSIONS()
206                        .iter()
207                        .map(|pgver| format!("`pg{}`", pgver.major))
208                        .collect::<Vec<_>>()
209                        .join(", ")
210                ))
211            }
212            versions => {
213                return Err(eyre!(
214                    "Multiple `pg$VERSION` features found.\n`--no-default-features` may be required.\nFound: {}",
215                    versions
216                        .iter()
217                        .map(|version| format!("pg{}", version.major))
218                        .collect::<Vec<String>>()
219                        .join(", ")
220                ))
221            }
222        };
223
224        let found_major = found_ver.major;
225        if let Ok(pg_config) = PgConfig::from_env() {
226            let major_version = pg_config.major_version()?;
227
228            if major_version != found_major {
229                panic!("Feature flag `pg{found_major}` does not match version from the environment-described PgConfig (`{major_version}`)")
230            }
231            vec![(major_version, pg_config)]
232        } else {
233            let specific = Pgrx::from_config()?.get(&format!("pg{}", found_ver.major))?;
234            vec![(found_ver.major, specific)]
235        }
236    };
237
238    // make sure we're not trying to build any of the yanked postgres versions
239    for (_, pg_config) in &pg_configs {
240        let version = pg_config.get_version()?;
241        if YANKED_POSTGRES_VERSIONS.contains(&version) {
242            panic!("Postgres v{}{} is incompatible with \
243                    other versions in this major series and is not supported by pgrx.  Please upgrade \
244                    to the latest version in the v{} series.", version.major, version.minor, version.major);
245        }
246    }
247
248    std::thread::scope(|scope| {
249        // This is pretty much either always 1 (normally) or 5 (for releases),
250        // but in the future if we ever have way more, we should consider
251        // chunking `pg_configs` based on `thread::available_parallelism()`.
252        let threads = pg_configs
253            .iter()
254            .map(|(pg_major_ver, pg_config)| {
255                scope.spawn(|| {
256                    generate_bindings(
257                        *pg_major_ver,
258                        pg_config,
259                        &build_paths,
260                        is_for_release,
261                        compile_cshim,
262                    )
263                })
264            })
265            .collect::<Vec<_>>();
266        // Most of the rest of this is just for better error handling --
267        // `thread::scope` already joins the threads for us before it returns.
268        let results = threads
269            .into_iter()
270            .map(|thread| thread.join().expect("thread panicked while generating bindings"))
271            .collect::<Vec<eyre::Result<_>>>();
272        results.into_iter().try_for_each(|r| r)
273    })?;
274
275    if compile_cshim {
276        // compile the cshim for each binding
277        for (_version, pg_config) in pg_configs {
278            build_shim(&build_paths.shim_src, &build_paths.shim_dst, &pg_config)?;
279        }
280    }
281
282    Ok(())
283}
284
285fn emit_rerun_if_changed() {
286    // `pgrx-pg-config` doesn't emit one for this.
287    println!("cargo:rerun-if-env-changed=PGRX_PG_CONFIG_PATH");
288    println!("cargo:rerun-if-env-changed=PGRX_PG_CONFIG_AS_ENV");
289    // Bindgen's behavior depends on these vars, but it doesn't emit them
290    // directly because the output would cause issue with `bindgen-cli`. Do it
291    // on bindgen's behalf.
292    println!("cargo:rerun-if-env-changed=LLVM_CONFIG_PATH");
293    println!("cargo:rerun-if-env-changed=LIBCLANG_PATH");
294    println!("cargo:rerun-if-env-changed=LIBCLANG_STATIC_PATH");
295    // Follows the logic bindgen uses here, more or less.
296    // https://github.com/rust-lang/rust-bindgen/blob/e6dd2c636/bindgen/lib.rs#L2918
297    println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS");
298    if let Some(target) = env_tracked("TARGET") {
299        println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS_{target}");
300        println!(
301            "cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS_{}",
302            target.replace('-', "_"),
303        );
304    }
305
306    // don't want to get stuck always generating bindings
307    println!("cargo:rerun-if-env-changed=PGRX_PG_SYS_GENERATE_BINDINGS_FOR_RELEASE");
308
309    println!("cargo:rerun-if-changed=include");
310    println!("cargo:rerun-if-changed=pgrx-cshim.c");
311
312    if let Ok(pgrx_config) = Pgrx::config_toml() {
313        println!("cargo:rerun-if-changed={}", pgrx_config.display());
314    }
315}
316
317fn generate_bindings(
318    major_version: u16,
319    pg_config: &PgConfig,
320    build_paths: &BuildPaths,
321    is_for_release: bool,
322    enable_cshim: bool,
323) -> eyre::Result<()> {
324    let mut include_h = build_paths.manifest_dir.clone();
325    include_h.push("include");
326    include_h.push(format!("pg{major_version}.h"));
327
328    let bindgen_output = get_bindings(major_version, pg_config, &include_h, enable_cshim)
329        .wrap_err_with(|| format!("bindgen failed for pg{major_version}"))?;
330
331    let oids = extract_oids(&bindgen_output);
332    let rewritten_items = rewrite_items(bindgen_output, &oids)
333        .wrap_err_with(|| format!("failed to rewrite items for pg{major_version}"))?;
334    let oids = format_builtin_oid_impl(oids);
335
336    let dest_dirs = if is_for_release {
337        vec![build_paths.out_dir.clone(), build_paths.src_dir.clone()]
338    } else {
339        vec![build_paths.out_dir.clone()]
340    };
341    for dest_dir in dest_dirs {
342        let mut bindings_file = dest_dir.clone();
343        bindings_file.push(format!("pg{major_version}.rs"));
344        write_rs_file(
345            rewritten_items.clone(),
346            &bindings_file,
347            quote! {
348                use crate as pg_sys;
349                use crate::{Datum, MultiXactId, Oid, PgNode, TransactionId};
350            },
351            is_for_release,
352        )
353        .wrap_err_with(|| {
354            format!(
355                "Unable to write bindings file for pg{} to `{}`",
356                major_version,
357                bindings_file.display()
358            )
359        })?;
360
361        let mut oids_file = dest_dir.clone();
362        oids_file.push(format!("pg{major_version}_oids.rs"));
363        write_rs_file(oids.clone(), &oids_file, quote! {}, is_for_release).wrap_err_with(|| {
364            format!(
365                "Unable to write oids file for pg{} to `{}`",
366                major_version,
367                oids_file.display()
368            )
369        })?;
370    }
371
372    let lib_dir = pg_config.lib_dir()?;
373    println!(
374        "cargo:rustc-link-search={}",
375        lib_dir.to_str().ok_or(eyre!("{lib_dir:?} is not valid UTF-8 string"))?
376    );
377    Ok(())
378}
379
380#[derive(Debug, Clone)]
381struct BuildPaths {
382    /// CARGO_MANIFEST_DIR
383    manifest_dir: PathBuf,
384    /// OUT_DIR
385    out_dir: PathBuf,
386    /// {manifest_dir}/src
387    src_dir: PathBuf,
388    /// {manifest_dir}/pgrx-cshim.c
389    shim_src: PathBuf,
390    /// {out_dir}/pgrx-cshim.c
391    shim_dst: PathBuf,
392}
393
394impl BuildPaths {
395    fn from_env() -> Self {
396        // Cargo guarantees these are provided, so unwrap is fine.
397        let manifest_dir = env_tracked("CARGO_MANIFEST_DIR").map(PathBuf::from).unwrap();
398        let out_dir = env_tracked("OUT_DIR").map(PathBuf::from).unwrap();
399        Self {
400            src_dir: manifest_dir.join("src/include"),
401            shim_src: manifest_dir.join("pgrx-cshim.c"),
402            shim_dst: out_dir.join("pgrx-cshim.c"),
403            out_dir,
404            manifest_dir,
405        }
406    }
407}
408
409fn write_rs_file(
410    code: proc_macro2::TokenStream,
411    file_path: &Path,
412    header: proc_macro2::TokenStream,
413    is_for_release: bool,
414) -> eyre::Result<()> {
415    use std::io::Write;
416    let mut contents = header;
417    contents.extend(code);
418    let mut file = fs::File::create(file_path)?;
419    write!(file, "/* Automatically generated by bindgen. Do not hand-edit.")?;
420    if is_for_release {
421        write!(
422            file,
423            "\n
424        This code is generated for documentation purposes, so that it is
425        easy to reference on docs.rs. Bindings are regenerated for your
426        build of pgrx, and the values of your Postgres version may differ.
427        */"
428        )
429    } else {
430        write!(file, " */")
431    }?;
432    write!(file, "{contents}")?;
433    rust_fmt(file_path)
434}
435
436/// Given a token stream representing a file, apply a series of transformations to munge
437/// the bindgen generated code with some postgres specific enhancements
438fn rewrite_items(
439    mut file: syn::File,
440    oids: &BTreeMap<syn::Ident, Box<syn::Expr>>,
441) -> eyre::Result<proc_macro2::TokenStream> {
442    rewrite_c_abi_to_c_unwind(&mut file);
443    let items_vec = rewrite_oid_consts(&file.items, oids);
444    let mut items = apply_pg_guard(&items_vec)?;
445    let pgnode_impls = impl_pg_node(&items_vec)?;
446
447    // append the pgnodes to the set of items
448    items.extend(pgnode_impls);
449
450    Ok(items)
451}
452
453/// Find all the constants that represent Postgres type OID values.
454///
455/// These are constants of type `u32` whose name ends in the string "OID"
456fn extract_oids(code: &syn::File) -> BTreeMap<syn::Ident, Box<syn::Expr>> {
457    let mut oids = BTreeMap::new(); // we would like to have a nice sorted set
458    for item in &code.items {
459        let Item::Const(ItemConst { ident, ty, expr, .. }) = item else { continue };
460        // Retype as strings for easy comparison
461        let name = ident.to_string();
462        let ty_str = ty.to_token_stream().to_string();
463
464        // This heuristic identifies "OIDs"
465        // We're going to warp the const declarations to be our newtype Oid
466        if ty_str == "u32" && is_builtin_oid(&name) {
467            oids.insert(ident.clone(), expr.clone());
468        }
469    }
470    oids
471}
472
473fn is_builtin_oid(name: &str) -> bool {
474    name.ends_with("OID") && name != "HEAP_HASOID"
475        || name.ends_with("RelationId")
476        || name == "TemplateDbOid"
477}
478
479fn rewrite_oid_consts(
480    items: &[syn::Item],
481    oids: &BTreeMap<syn::Ident, Box<syn::Expr>>,
482) -> Vec<syn::Item> {
483    items
484        .iter()
485        .map(|item| match item {
486            Item::Const(ItemConst { ident, ty, expr, .. })
487                if ty.to_token_stream().to_string() == "u32" && oids.get(ident) == Some(expr) =>
488            {
489                syn::parse2(quote! { pub const #ident : Oid = Oid(#expr); }).unwrap()
490            }
491            item => item.clone(),
492        })
493        .collect()
494}
495
496fn format_builtin_oid_impl(oids: BTreeMap<syn::Ident, Box<syn::Expr>>) -> proc_macro2::TokenStream {
497    let enum_variants: proc_macro2::TokenStream;
498    let from_impl: proc_macro2::TokenStream;
499    (enum_variants, from_impl) = oids
500        .iter()
501        .map(|(ident, expr)| {
502            (quote! { #ident = #expr, }, quote! { #expr => Ok(BuiltinOid::#ident), })
503        })
504        .unzip();
505
506    quote! {
507        use crate::{NotBuiltinOid};
508
509        #[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
510        pub enum BuiltinOid {
511            #enum_variants
512        }
513
514        impl BuiltinOid {
515            pub const fn from_u32(uint: u32) -> Result<BuiltinOid, NotBuiltinOid> {
516                match uint {
517                    0 => Err(NotBuiltinOid::Invalid),
518                    #from_impl
519                    _ => Err(NotBuiltinOid::Ambiguous),
520                }
521            }
522        }
523    }
524}
525
526/// Implement our `PgNode` marker trait for `pg_sys::Node` and its "subclasses"
527fn impl_pg_node(items: &[syn::Item]) -> eyre::Result<proc_macro2::TokenStream> {
528    let mut pgnode_impls = proc_macro2::TokenStream::new();
529
530    // we scope must of the computation so we can borrow `items` and then
531    // extend it at the very end.
532    let struct_graph: StructGraph = StructGraph::from(items);
533
534    // collect all the structs with `NodeTag` as their first member,
535    // these will serve as roots in our forest of `Node`s
536    let mut root_node_structs = Vec::new();
537    for descriptor in struct_graph.descriptors.iter() {
538        // grab the first field, if any
539        let first_field = match &descriptor.struct_.fields {
540            syn::Fields::Named(fields) => {
541                if let Some(first_field) = fields.named.first() {
542                    first_field
543                } else {
544                    continue;
545                }
546            }
547            syn::Fields::Unnamed(fields) => {
548                if let Some(first_field) = fields.unnamed.first() {
549                    first_field
550                } else {
551                    continue;
552                }
553            }
554            _ => continue,
555        };
556
557        // grab the type name of the first field
558        let ty_name = if let syn::Type::Path(p) = &first_field.ty {
559            if let Some(last_segment) = p.path.segments.last() {
560                last_segment.ident.to_string()
561            } else {
562                continue;
563            }
564        } else {
565            continue;
566        };
567
568        if ty_name == "NodeTag" {
569            root_node_structs.push(descriptor);
570        }
571    }
572
573    // the set of types which subclass `Node` according to postgres' object system
574    let mut node_set = BTreeSet::new();
575    // fill in any children of the roots with a recursive DFS
576    // (we are not operating on user input, so it is ok to just
577    //  use direct recursion rather than an explicit stack).
578    for root in root_node_structs.into_iter() {
579        dfs_find_nodes(root, &struct_graph, &mut node_set);
580    }
581
582    // now we can finally iterate the Nodes and emit out Display impl
583    for node_struct in node_set.into_iter() {
584        let struct_name = &node_struct.struct_.ident;
585
586        // impl the PgNode trait for all nodes
587        pgnode_impls.extend(quote! {
588            impl pg_sys::seal::Sealed for #struct_name {}
589            impl pg_sys::PgNode for #struct_name {}
590        });
591
592        // impl Rust's Display trait for all nodes
593        pgnode_impls.extend(quote! {
594            impl ::core::fmt::Display for #struct_name {
595                fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
596                    self.display_node().fmt(f)
597                }
598            }
599        });
600    }
601
602    Ok(pgnode_impls)
603}
604
605/// Given a root node, dfs_find_nodes adds all its children nodes to `node_set`.
606fn dfs_find_nodes<'graph>(
607    node: &'graph StructDescriptor<'graph>,
608    graph: &'graph StructGraph<'graph>,
609    node_set: &mut BTreeSet<StructDescriptor<'graph>>,
610) {
611    node_set.insert(node.clone());
612
613    for child in node.children(graph) {
614        if node_set.contains(child) {
615            continue;
616        }
617        dfs_find_nodes(child, graph, node_set);
618    }
619}
620
621/// A graph describing the inheritance relationships between different nodes
622/// according to postgres' object system.
623///
624/// NOTE: the borrowed lifetime on a StructGraph should also ensure that the offsets
625///       it stores into the underlying items struct are always correct.
626#[derive(Clone, Debug)]
627struct StructGraph<'a> {
628    #[allow(dead_code)]
629    /// A table mapping struct names to their offset in the descriptor table
630    name_tab: HashMap<String, usize>,
631    #[allow(dead_code)]
632    /// A table mapping offsets into the underlying items table to offsets in the descriptor table
633    item_offset_tab: Vec<Option<usize>>,
634    /// A table of struct descriptors
635    descriptors: Vec<StructDescriptor<'a>>,
636}
637
638impl<'a> From<&'a [syn::Item]> for StructGraph<'a> {
639    fn from(items: &'a [syn::Item]) -> StructGraph<'a> {
640        let mut descriptors = Vec::new();
641
642        // a table mapping struct names to their offset in `descriptors`
643        let mut name_tab: HashMap<String, usize> = HashMap::new();
644        let mut item_offset_tab: Vec<Option<usize>> = vec![None; items.len()];
645        for (i, item) in items.iter().enumerate() {
646            if let &syn::Item::Struct(struct_) = &item {
647                let next_offset = descriptors.len();
648                descriptors.push(StructDescriptor {
649                    struct_,
650                    items_offset: i,
651                    parent: None,
652                    children: Vec::new(),
653                });
654                name_tab.insert(struct_.ident.to_string(), next_offset);
655                item_offset_tab[i] = Some(next_offset);
656            }
657        }
658
659        for item in items.iter() {
660            // grab the first field if it is struct
661            let (id, first_field) = match &item {
662                syn::Item::Struct(syn::ItemStruct {
663                    ident: id,
664                    fields: syn::Fields::Named(fields),
665                    ..
666                }) => {
667                    if let Some(first_field) = fields.named.first() {
668                        (id.to_string(), first_field)
669                    } else {
670                        continue;
671                    }
672                }
673                &syn::Item::Struct(syn::ItemStruct {
674                    ident: id,
675                    fields: syn::Fields::Unnamed(fields),
676                    ..
677                }) => {
678                    if let Some(first_field) = fields.unnamed.first() {
679                        (id.to_string(), first_field)
680                    } else {
681                        continue;
682                    }
683                }
684                _ => continue,
685            };
686
687            if let syn::Type::Path(p) = &first_field.ty {
688                // We should be guaranteed that just extracting the last path
689                // segment is ok because these structs are all from the same module.
690                // (also, they are all generated from C code, so collisions should be
691                //  impossible anyway thanks to C's single shared namespace).
692                if let Some(last_segment) = p.path.segments.last() {
693                    if let Some(parent_offset) = name_tab.get(&last_segment.ident.to_string()) {
694                        // establish the 2-way link
695                        let child_offset = name_tab[&id];
696                        descriptors[child_offset].parent = Some(*parent_offset);
697                        descriptors[*parent_offset].children.push(child_offset);
698                    }
699                }
700            }
701        }
702
703        StructGraph { name_tab, item_offset_tab, descriptors }
704    }
705}
706
707impl<'a> StructDescriptor<'a> {
708    /// children returns an iterator over the children of this node in the graph
709    fn children(&'a self, graph: &'a StructGraph) -> StructDescriptorChildren<'a> {
710        StructDescriptorChildren { offset: 0, descriptor: self, graph }
711    }
712}
713
714/// An iterator over a StructDescriptor's children
715struct StructDescriptorChildren<'a> {
716    offset: usize,
717    descriptor: &'a StructDescriptor<'a>,
718    graph: &'a StructGraph<'a>,
719}
720
721impl<'a> std::iter::Iterator for StructDescriptorChildren<'a> {
722    type Item = &'a StructDescriptor<'a>;
723    fn next(&mut self) -> Option<&'a StructDescriptor<'a>> {
724        if self.offset >= self.descriptor.children.len() {
725            None
726        } else {
727            let ret = Some(&self.graph.descriptors[self.descriptor.children[self.offset]]);
728            self.offset += 1;
729            ret
730        }
731    }
732}
733
734/// A node a StructGraph
735#[derive(Clone, Debug, Hash, Eq, PartialEq)]
736struct StructDescriptor<'a> {
737    /// A reference to the underlying struct syntax node
738    struct_: &'a syn::ItemStruct,
739    /// An offset into the items slice that was used to construct the struct graph that
740    /// this StructDescriptor is a part of
741    items_offset: usize,
742    /// The offset of the "parent" (first member) struct (if any).
743    parent: Option<usize>,
744    /// The offsets of the "children" structs (if any).
745    children: Vec<usize>,
746}
747
748impl PartialOrd for StructDescriptor<'_> {
749    #[inline]
750    fn partial_cmp(&self, other: &StructDescriptor) -> Option<Ordering> {
751        Some(self.cmp(other))
752    }
753}
754
755impl Ord for StructDescriptor<'_> {
756    #[inline]
757    fn cmp(&self, other: &StructDescriptor) -> Ordering {
758        self.struct_.ident.cmp(&other.struct_.ident)
759    }
760}
761
762fn get_bindings(
763    major_version: u16,
764    pg_config: &PgConfig,
765    include_h: &path::Path,
766    enable_cshim: bool,
767) -> eyre::Result<syn::File> {
768    let bindings = if let Some(info_dir) =
769        target_env_tracked(&format!("PGRX_TARGET_INFO_PATH_PG{major_version}"))
770    {
771        let bindings_file = format!("{info_dir}/pg{major_version}_raw_bindings.rs");
772        std::fs::read_to_string(&bindings_file)
773            .wrap_err_with(|| format!("failed to read raw bindings from {bindings_file}"))?
774    } else {
775        let bindings = run_bindgen(major_version, pg_config, include_h, enable_cshim)?;
776        if let Some(path) = env_tracked("PGRX_PG_SYS_EXTRA_OUTPUT_PATH") {
777            std::fs::write(path, &bindings)?;
778        }
779        bindings
780    };
781    syn::parse_file(bindings.as_str()).wrap_err_with(|| "failed to parse generated bindings")
782}
783
784/// Given a specific postgres version, `run_bindgen` generates bindings for the given
785/// postgres version and returns them as a token stream.
786fn run_bindgen(
787    major_version: u16,
788    pg_config: &PgConfig,
789    include_h: &path::Path,
790    enable_cshim: bool,
791) -> eyre::Result<String> {
792    eprintln!("Generating bindings for pg{major_version}");
793    let configure = pg_config.configure()?;
794    let preferred_clang: Option<&std::path::Path> = configure.get("CLANG").map(|s| s.as_ref());
795    eprintln!("pg_config --configure CLANG = {preferred_clang:?}");
796    let pg_target_includes = pg_target_includes(major_version, pg_config)?;
797    eprintln!("pg_target_includes = {pg_target_includes:?}");
798    let (autodetect, includes) = clang::detect_include_paths_for(preferred_clang);
799    let mut binder = bindgen::Builder::default();
800    binder = add_blocklists(binder);
801    binder = add_allowlists(binder, pg_target_includes.iter().map(|x| x.as_str()));
802    binder = add_derives(binder);
803    if !autodetect {
804        let builtin_includes = includes.iter().filter_map(|p| Some(format!("-I{}", p.to_str()?)));
805        binder = binder.clang_args(builtin_includes);
806    };
807    let enum_names = Rc::new(RefCell::new(BTreeMap::new()));
808    let overrides = BindingOverride::new_from(Rc::clone(&enum_names));
809    let out_path = PathBuf::from(std::env::var("OUT_DIR").unwrap());
810    let bindings = binder
811        .header(include_h.display().to_string())
812        .clang_args(extra_bindgen_clang_args(pg_config)?)
813        .clang_args(pg_target_includes.iter().map(|x| format!("-I{x}")))
814        .detect_include_paths(autodetect)
815        .parse_callbacks(Box::new(overrides))
816        .default_enum_style(bindgen::EnumVariation::ModuleConsts)
817        // The NodeTag enum is closed: additions break existing values in the set, so it is not extensible
818        .rustified_non_exhaustive_enum("NodeTag")
819        .size_t_is_usize(true)
820        .merge_extern_blocks(true)
821        .wrap_unsafe_ops(true)
822        .use_core()
823        .generate_cstr(true)
824        .disable_nested_struct_naming()
825        .formatter(bindgen::Formatter::None)
826        .layout_tests(false)
827        .default_non_copy_union_style(NonCopyUnionStyle::ManuallyDrop)
828        .wrap_static_fns(enable_cshim)
829        .wrap_static_fns_path(out_path.join("pgrx-cshim-static"))
830        .wrap_static_fns_suffix("__pgrx_cshim")
831        .generate()
832        .wrap_err_with(|| format!("Unable to generate bindings for pg{major_version}"))?;
833    let mut binding_str = bindings.to_string();
834    drop(bindings); // So the Rc::into_inner can unwrap
835
836    // FIXME: do this for the Node graph instead of reparsing?
837    let enum_names: EnumMap = Rc::into_inner(enum_names).unwrap().into_inner();
838    binding_str.extend(enum_names.into_iter().flat_map(|(name, variants)| {
839        const MIN_I32: i64 = i32::MIN as _;
840        const MAX_I32: i64 = i32::MAX as _;
841        const MAX_U32: u64 = u32::MAX as _;
842        variants.into_iter().map(move |(variant, value)| {
843            let (ty, value) = match value {
844                EnumVariantValue::Boolean(b) => ("bool", b.to_string()),
845                EnumVariantValue::Signed(v @ MIN_I32..=MAX_I32) => ("i32", v.to_string()),
846                EnumVariantValue::Signed(v) => ("i64", v.to_string()),
847                EnumVariantValue::Unsigned(v @ 0..=MAX_U32) => ("u32", v.to_string()),
848                EnumVariantValue::Unsigned(v) => ("u64", v.to_string()),
849            };
850            format!(
851                r#"
852#[deprecated(since = "0.12.0", note = "you want pg_sys::{module}::{variant}")]
853pub const {module}_{variant}: {ty} = {value};"#,
854                module = &*name, // imprecise closure capture
855            )
856        })
857    }));
858
859    Ok(binding_str)
860}
861
862fn add_blocklists(bind: bindgen::Builder) -> bindgen::Builder {
863    bind.blocklist_type("Datum") // manually wrapping datum for correctness
864        .blocklist_type("Oid") // "Oid" is not just any u32
865        .blocklist_type("TransactionId") // "TransactionId" is not just any u32
866        .blocklist_type("MultiXactId") // it's an alias of "TransactionId"
867        .blocklist_var("CONFIGURE_ARGS") // configuration during build is hopefully irrelevant
868        .blocklist_var("_*(?:HAVE|have)_.*") // header tracking metadata
869        .blocklist_var("_[A-Z_]+_H") // more header metadata
870        // It's used by explict `extern "C-unwind"`
871        .blocklist_function("pg_re_throw")
872        .blocklist_function("err(start|code|msg|detail|context_msg|hint|finish)")
873        // These functions are already ported in Rust
874        .blocklist_function("heap_getattr")
875        .blocklist_function("BufferGetBlock")
876        .blocklist_function("BufferGetPage")
877        .blocklist_function("BufferIsLocal")
878        .blocklist_function("GetMemoryChunkContext")
879        .blocklist_function("GETSTRUCT")
880        .blocklist_function("MAXALIGN")
881        .blocklist_function("MemoryContextIsValid")
882        .blocklist_function("MemoryContextSwitchTo")
883        .blocklist_function("TYPEALIGN")
884        .blocklist_function("TransactionIdIsNormal")
885        .blocklist_function("expression_tree_walker")
886        .blocklist_function("get_pg_major_minor_version_string")
887        .blocklist_function("get_pg_major_version_num")
888        .blocklist_function("get_pg_major_version_string")
889        .blocklist_function("get_pg_version_string")
890        .blocklist_function("heap_tuple_get_struct")
891        .blocklist_function("planstate_tree_walker")
892        .blocklist_function("query_or_expression_tree_walker")
893        .blocklist_function("query_tree_walker")
894        .blocklist_function("range_table_entry_walker")
895        .blocklist_function("range_table_walker")
896        .blocklist_function("raw_expression_tree_walker")
897        .blocklist_function("type_is_array")
898        .blocklist_function("varsize_any")
899        // we define these ourselves b/c Postgres is schizophrenic about them across versions
900        .blocklist_function("PageValidateSpecialPointer")
901        .blocklist_function("PageIsValid")
902        // it's defined twice on Windows, so use PGERROR instead
903        .blocklist_item("ERROR")
904        // it causes strange linker errors for PostgreSQL 14 on Windows
905        .blocklist_function("IsQueryIdEnabled")
906}
907
908fn add_allowlists<'a>(
909    mut bind: bindgen::Builder,
910    pg_target_includes: impl Iterator<Item = &'a str>,
911) -> bindgen::Builder {
912    for pg_target_include in pg_target_includes {
913        bind = bind.allowlist_file(format!("{}.*", regex::escape(pg_target_include)))
914    }
915    bind.allowlist_item("PGERROR").allowlist_item("SIG.*")
916}
917
918fn add_derives(bind: bindgen::Builder) -> bindgen::Builder {
919    bind.derive_debug(true)
920        .derive_copy(true)
921        .derive_default(true)
922        .derive_eq(false)
923        .derive_partialeq(false)
924        .derive_hash(false)
925        .derive_ord(false)
926        .derive_partialord(false)
927}
928
929fn env_tracked(s: &str) -> Option<String> {
930    // a **sorted** list of environment variable keys that cargo might set that we don't need to track
931    // these were picked out, by hand, from: https://doc.rust-lang.org/cargo/reference/environment-variables.html
932    const CARGO_KEYS: &[&str] = &[
933        "BROWSER",
934        "DEBUG",
935        "DOCS_RS",
936        "HOST",
937        "HTTP_PROXY",
938        "HTTP_TIMEOUT",
939        "NUM_JOBS",
940        "OPT_LEVEL",
941        "OUT_DIR",
942        "PATH",
943        "PROFILE",
944        "TARGET",
945        "TERM",
946    ];
947
948    let is_cargo_key =
949        s.starts_with("CARGO") || s.starts_with("RUST") || CARGO_KEYS.binary_search(&s).is_ok();
950
951    if !is_cargo_key {
952        // if it's an envar that cargo gives us, we don't want to ask it to rerun build.rs if it changes
953        // we'll let cargo figure that out for itself, and doing so, depending on the key, seems to
954        // cause cargo to rerun build.rs every time, which is terrible
955        println!("cargo:rerun-if-env-changed={s}");
956    }
957    std::env::var(s).ok()
958}
959
960fn target_env_tracked(s: &str) -> Option<String> {
961    let target = env_tracked("TARGET").unwrap();
962    env_tracked(&format!("{s}_{target}")).or_else(|| env_tracked(s))
963}
964
965fn find_include(
966    pg_version: u16,
967    var: &str,
968    default: impl Fn() -> eyre::Result<PathBuf>,
969) -> eyre::Result<String> {
970    let value =
971        target_env_tracked(&format!("{var}_PG{pg_version}")).or_else(|| target_env_tracked(var));
972    let path = match value {
973        // No configured value: ask `pg_config`.
974        None => default()?,
975        // Configured to non-empty string: pass to bindgen
976        Some(overridden) => Path::new(&overridden).to_path_buf(),
977    };
978    let path = std::fs::canonicalize(&path)
979        .wrap_err(format!("cannot find {path:?} for C header files"))?
980        .join("") // returning a `/`-ending path
981        .display()
982        .to_string();
983    if let Some(path) = path.strip_prefix("\\\\?\\") {
984        Ok(path.to_string())
985    } else {
986        Ok(path)
987    }
988}
989
990fn pg_target_includes(pg_version: u16, pg_config: &PgConfig) -> eyre::Result<Vec<String>> {
991    let mut result =
992        vec![find_include(pg_version, "PGRX_INCLUDEDIR_SERVER", || pg_config.includedir_server())?];
993    if let Some("msvc") = env_tracked("CARGO_CFG_TARGET_ENV").as_deref() {
994        result.push(find_include(pg_version, "PGRX_PKGINCLUDEDIR", || pg_config.pkgincludedir())?);
995        result.push(find_include(pg_version, "PGRX_INCLUDEDIR_SERVER_PORT_WIN32", || {
996            pg_config.includedir_server_port_win32()
997        })?);
998        result.push(find_include(pg_version, "PGRX_INCLUDEDIR_SERVER_PORT_WIN32_MSVC", || {
999            pg_config.includedir_server_port_win32_msvc()
1000        })?);
1001    }
1002    Ok(result)
1003}
1004
1005fn build_shim(
1006    shim_src: &path::Path,
1007    shim_dst: &path::Path,
1008    pg_config: &PgConfig,
1009) -> eyre::Result<()> {
1010    let major_version = pg_config.major_version()?;
1011
1012    std::fs::copy(shim_src, shim_dst).unwrap();
1013
1014    let mut build = cc::Build::new();
1015    let compiler = build.get_compiler();
1016    if compiler.is_like_gnu() || compiler.is_like_clang() {
1017        build.flag("-ffunction-sections");
1018        build.flag("-fdata-sections");
1019    }
1020    if compiler.is_like_msvc() {
1021        build.flag("/Gy");
1022        build.flag("/Gw");
1023    }
1024    for pg_target_include in pg_target_includes(major_version, pg_config)?.iter() {
1025        build.flag(format!("-I{pg_target_include}"));
1026    }
1027    for flag in extra_bindgen_clang_args(pg_config)? {
1028        build.flag(&flag);
1029    }
1030    build.file(shim_dst);
1031    build.compile("pgrx-cshim");
1032    Ok(())
1033}
1034
1035fn extra_bindgen_clang_args(pg_config: &PgConfig) -> eyre::Result<Vec<String>> {
1036    let mut out = vec![];
1037    let flags = shlex::split(&pg_config.cppflags()?.to_string_lossy()).unwrap_or_default();
1038    if env_tracked("CARGO_CFG_TARGET_OS").as_deref() != Some("windows") {
1039        // Just give clang the full flag set, since presumably that's what we're
1040        // getting when we build the C shim anyway.
1041        // Skip it on Windows, since clang is used to generate cshim but MSVC is
1042        // used to compile PostgreSQL.
1043        out.extend(flags.iter().cloned());
1044    }
1045    if env_tracked("CARGO_CFG_TARGET_OS").as_deref() == Some("macos") {
1046        // Find the `-isysroot` flags so we can warn about them, so something
1047        // reasonable shows up if/when the build fails.
1048        //
1049        // TODO(thom): Could probably fix some brew/xcode issues here in the
1050        // Find the `-isysroot` flags so we can warn about them, so something
1051        // reasonable shows up if/when the build fails.
1052        //
1053        // - Handle homebrew packages initially linked against as keg-only, but
1054        //   which have had their version bumped.
1055        for pair in flags.windows(2) {
1056            if pair[0] == "-isysroot" {
1057                if !std::path::Path::new(&pair[1]).exists() {
1058                    // The SDK path doesn't exist. Emit a warning, which they'll
1059                    // see if the build ends up failing (it may not fail in all
1060                    // cases, so we don't panic here).
1061                    //
1062                    // There's a bunch of smarter things we can try here, but
1063                    // most of them either break things that currently work, or
1064                    // are very difficult to get right. If you try to fix this,
1065                    // be sure to consider cases like:
1066                    //
1067                    // - User may have CommandLineTools and not Xcode, vice
1068                    //   versa, or both installed.
1069                    // - User may using a newer SDK than their OS, or vice
1070                    //   versa.
1071                    // - User may be using a newer SDK than their XCode (updated
1072                    //   Command line tools, not OS), or vice versa.
1073                    // - And so on.
1074                    //
1075                    // These are all actually fairly common. Note that the code
1076                    // as-is is *not* broken in these cases (except on OS/SDK
1077                    // updates), so care should be taken to avoid changing that
1078                    // if possible.
1079                    //
1080                    // The logic we'd like ideally is for `cargo pgrx init` to
1081                    // choose a good SDK in the first place, and force postgres
1082                    // to use it. Then, the logic in this build script would
1083                    // Just Work without changes (since we are using its
1084                    // sysroot verbatim).
1085                    //
1086                    // The value of "Good" here is tricky, but the logic should
1087                    // probably:
1088                    //
1089                    // - prefer SDKs from the CLI tools to ones from XCode
1090                    //   (since they're guaranteed compatible with the user's OS
1091                    //   version)
1092                    //
1093                    // - prefer SDKs that specify only the major SDK version
1094                    //   (e.g. MacOSX12.sdk and not MacOSX12.4.sdk or
1095                    //   MacOSX.sdk), to avoid breaking too frequently (if we
1096                    //   have a minor version) or being totally unable to detect
1097                    //   what version of the SDK was used to build postgres (if
1098                    //   we have neither).
1099                    //
1100                    // - Avoid choosing an SDK newer than the user's OS version,
1101                    //   since postgres fails to detect that they are missing if
1102                    //   you do.
1103                    //
1104                    // This is surprisingly hard to implement, as the
1105                    // information is scattered across a dozen ini files.
1106                    // Presumably Apple assumes you'll use
1107                    // `MACOSX_DEPLOYMENT_TARGET`, rather than basing it off the
1108                    // SDK version, but it's not an option for postgres.
1109                    let major_version = pg_config.major_version()?;
1110                    println!(
1111                        "cargo:warning=postgres v{major_version} was compiled against an \
1112                         SDK Root which does not seem to exist on this machine ({}). You may \
1113                         need to re-run `cargo pgrx init` and/or update your command line tools.",
1114                        pair[1],
1115                    );
1116                };
1117                // Either way, we stop here.
1118                break;
1119            }
1120        }
1121    }
1122    Ok(out)
1123}
1124
1125fn run_command(mut command: &mut Command, version: &str) -> eyre::Result<Output> {
1126    let mut dbg = String::new();
1127
1128    command = command
1129        .env_remove("DEBUG")
1130        .env_remove("MAKEFLAGS")
1131        .env_remove("MAKELEVEL")
1132        .env_remove("MFLAGS")
1133        .env_remove("DYLD_FALLBACK_LIBRARY_PATH")
1134        .env_remove("OPT_LEVEL")
1135        .env_remove("PROFILE")
1136        .env_remove("OUT_DIR")
1137        .env_remove("NUM_JOBS");
1138
1139    eprintln!("[{version}] {command:?}");
1140    dbg.push_str(&format!("[{version}] -------- {command:?} -------- \n"));
1141
1142    let output = command.output()?;
1143    let rc = output.clone();
1144
1145    if !output.stdout.is_empty() {
1146        for line in String::from_utf8(output.stdout).unwrap().lines() {
1147            if line.starts_with("cargo:") {
1148                dbg.push_str(&format!("{line}\n"));
1149            } else {
1150                dbg.push_str(&format!("[{version}] [stdout] {line}\n"));
1151            }
1152        }
1153    }
1154
1155    if !output.stderr.is_empty() {
1156        for line in String::from_utf8(output.stderr).unwrap().lines() {
1157            dbg.push_str(&format!("[{version}] [stderr] {line}\n"));
1158        }
1159    }
1160    dbg.push_str(&format!("[{version}] /----------------------------------------\n"));
1161
1162    eprintln!("{dbg}");
1163    Ok(rc)
1164}
1165
1166fn apply_pg_guard(items: &Vec<syn::Item>) -> eyre::Result<proc_macro2::TokenStream> {
1167    let mut out = proc_macro2::TokenStream::new();
1168    for item in items {
1169        match item {
1170            Item::ForeignMod(block) => {
1171                out.extend(quote! {
1172                    #[pgrx_macros::pg_guard]
1173                    #block
1174                });
1175            }
1176            _ => {
1177                out.extend(item.into_token_stream());
1178            }
1179        }
1180    }
1181
1182    Ok(out)
1183}
1184
1185fn rewrite_c_abi_to_c_unwind(file: &mut syn::File) {
1186    use proc_macro2::Span;
1187    use syn::visit_mut::VisitMut;
1188    use syn::LitStr;
1189    pub struct Visitor {}
1190    impl VisitMut for Visitor {
1191        fn visit_abi_mut(&mut self, abi: &mut syn::Abi) {
1192            if let Some(name) = &mut abi.name {
1193                if name.value() == "C" {
1194                    *name = LitStr::new("C-unwind", Span::call_site());
1195                }
1196            }
1197        }
1198    }
1199    Visitor {}.visit_file_mut(file);
1200}
1201
1202fn rust_fmt(path: &Path) -> eyre::Result<()> {
1203    // We shouldn't hit this path in a case where we care about it, but... just
1204    // in case we probably should respect RUSTFMT.
1205    let rustfmt = env_tracked("RUSTFMT").unwrap_or_else(|| "rustfmt".into());
1206    let mut command = Command::new(rustfmt);
1207    command.arg(path).args(["--edition", "2021"]).current_dir(".");
1208
1209    let out = run_command(&mut command, "[bindings_diff]");
1210    match out {
1211        Ok(output) if output.status.success() => Ok(()),
1212        Ok(output) => {
1213            let rustfmt_output = format!(
1214                r#"Problems running rustfmt: {command:?}:
1215                {}
1216                {}"#,
1217                String::from_utf8_lossy(&output.stdout),
1218                String::from_utf8_lossy(&output.stderr)
1219            );
1220
1221            for line in rustfmt_output.lines() {
1222                println!("cargo:warning={line}");
1223            }
1224
1225            // we won't fail the build because rustfmt failed
1226            Ok(())
1227        }
1228        Err(e)
1229            if e.downcast_ref::<std::io::Error>()
1230                .ok_or(eyre!("Couldn't downcast error ref"))?
1231                .kind()
1232                == std::io::ErrorKind::NotFound =>
1233        {
1234            Err(e).wrap_err("Failed to run `rustfmt`, is it installed?")
1235        }
1236        Err(e) => Err(e),
1237    }
1238}