autocxx_engine/
lib.rs

1//! The core of the `autocxx` engine, used by both the
2//! `autocxx_macro` and also code generators (e.g. `autocxx_build`).
3//! See [IncludeCppEngine] for general description of how this engine works.
4
5// Copyright 2020 Google LLC
6//
7// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
10// option. This file may not be copied, modified, or distributed
11// except according to those terms.
12
13#![forbid(unsafe_code)]
14#![cfg_attr(feature = "nightly", feature(doc_cfg))]
15
16mod ast_discoverer;
17mod conversion;
18mod cxxbridge;
19mod known_types;
20mod minisyn;
21mod output_generators;
22mod parse_callbacks;
23mod parse_file;
24mod rust_pretty_printer;
25mod types;
26
27#[cfg(any(test, feature = "build"))]
28mod builder;
29
30use autocxx_bindgen::BindgenError;
31use autocxx_parser::{IncludeCppConfig, UnsafePolicy};
32use conversion::BridgeConverter;
33use miette::{SourceOffset, SourceSpan};
34use parse_callbacks::{AutocxxParseCallbacks, ParseCallbackResults, UnindexedParseCallbackResults};
35use parse_file::CppBuildable;
36use proc_macro2::TokenStream as TokenStream2;
37use regex::Regex;
38use std::cell::RefCell;
39use std::path::PathBuf;
40use std::rc::Rc;
41use std::{
42    fs::File,
43    io::prelude::*,
44    path::Path,
45    process::{Command, Stdio},
46};
47use tempfile::NamedTempFile;
48
49use quote::ToTokens;
50use syn::Result as ParseResult;
51use syn::{
52    parse::{Parse, ParseStream},
53    parse_quote, ItemMod, Macro,
54};
55use thiserror::Error;
56
57use itertools::{join, Itertools};
58use known_types::known_types;
59use log::info;
60use miette::Diagnostic;
61
62/// We use a forked version of bindgen - for now.
63/// We hope to unfork.
64use autocxx_bindgen as bindgen;
65
66#[cfg(any(test, feature = "build"))]
67pub use builder::{
68    Builder, BuilderBuild, BuilderContext, BuilderError, BuilderResult, BuilderSuccess,
69};
70pub use output_generators::{generate_rs_archive, generate_rs_single, RsOutput};
71pub use parse_file::{parse_file, ParseError, ParsedFile};
72
73pub use cxx_gen::HEADER;
74
75#[derive(Clone)]
76/// Some C++ content which should be written to disk and built.
77pub struct CppFilePair {
78    /// Declarations to go into a header file.
79    pub header: Vec<u8>,
80    /// Implementations to go into a .cpp file.
81    pub implementation: Option<Vec<u8>>,
82    /// The name which should be used for the header file
83    /// (important as it may be `#include`d elsewhere)
84    pub header_name: String,
85}
86
87/// All generated C++ content which should be written to disk.
88pub struct GeneratedCpp(pub Vec<CppFilePair>);
89
90/// A [`syn::Error`] which also implements [`miette::Diagnostic`] so can be pretty-printed
91/// to show the affected span of code.
92#[derive(Error, Debug, Diagnostic)]
93#[error("{err}")]
94pub struct LocatedSynError {
95    err: syn::Error,
96    #[source_code]
97    file: String,
98    #[label("error here")]
99    span: SourceSpan,
100}
101
102impl LocatedSynError {
103    fn new(err: syn::Error, file: &str) -> Self {
104        let span = proc_macro_span_to_miette_span(&err.span());
105        Self {
106            err,
107            file: file.to_string(),
108            span,
109        }
110    }
111}
112
113/// Errors which may occur in generating bindings for these C++
114/// functions.
115#[derive(Debug, Error, Diagnostic)]
116pub enum Error {
117    #[error("Bindgen was unable to generate the initial .rs bindings for this file. This may indicate a parsing problem with the C++ headers.")]
118    Bindgen(BindgenError),
119    #[error(transparent)]
120    #[diagnostic(transparent)]
121    MacroParsing(LocatedSynError),
122    #[error(transparent)]
123    #[diagnostic(transparent)]
124    BindingsParsing(LocatedSynError),
125    #[error("no C++ include directory was provided.")]
126    NoAutoCxxInc,
127    #[error(transparent)]
128    #[diagnostic(transparent)]
129    Conversion(conversion::ConvertError),
130    #[error("Using `unsafe_references_wrapped` requires the Rust nightly `arbitrary_self_types` feature")]
131    WrappedReferencesButNoArbitrarySelfTypes,
132}
133
134/// Result type.
135pub type Result<T, E = Error> = std::result::Result<T, E>;
136
137struct GenerationResults {
138    item_mod: ItemMod,
139    cpp: Option<CppFilePair>,
140    #[allow(dead_code)]
141    inc_dirs: Vec<PathBuf>,
142    cxxgen_header_name: String,
143}
144enum State {
145    NotGenerated,
146    ParseOnly,
147    Generated(Box<GenerationResults>),
148}
149
150/// Code generation options.
151#[derive(Default)]
152pub struct CodegenOptions<'a> {
153    // An option used by the test suite to force a more convoluted
154    // route through our code, to uncover bugs.
155    pub force_wrapper_gen: bool,
156    /// Options about the C++ code generation.
157    pub cpp_codegen_options: CppCodegenOptions<'a>,
158}
159
160const AUTOCXX_CLANG_ARGS: &[&str; 4] = &["-x", "c++", "-std=c++14", "-DBINDGEN"];
161
162/// Implement to learn of header files which get included
163/// by this build process, such that your build system can choose
164/// to rerun the build process if any such file changes in future.
165pub trait RebuildDependencyRecorder: std::fmt::Debug {
166    /// Records that this autocxx build depends on the given
167    /// header file. Full paths will be provided.
168    fn record_header_file_dependency(&self, filename: &str);
169}
170
171#[cfg_attr(doc, aquamarine::aquamarine)]
172/// Core of the autocxx engine.
173///
174/// The basic idea is this. We will run `bindgen` which will spit
175/// out a ton of Rust code corresponding to all the types and functions
176/// defined in C++. We'll then post-process that bindgen output
177/// into a form suitable for ingestion by `cxx`.
178/// (It's the `BridgeConverter` mod which does that.)
179/// Along the way, the `bridge_converter` might tell us of additional
180/// C++ code which we should generate, e.g. wrappers to move things
181/// into and out of `UniquePtr`s.
182///
183/// ```mermaid
184/// flowchart TB
185///     s[(C++ headers)]
186///     s --> lc
187///     rss[(.rs input)]
188///     rss --> parser
189///     parser --> include_cpp_conf
190///     cpp_output[(C++ output)]
191///     rs_output[(.rs output)]
192///     subgraph autocxx[autocxx_engine]
193///     parser[File parser]
194///     subgraph bindgen[autocxx_bindgen]
195///     lc[libclang parse]
196///     bir(bindgen IR)
197///     lc --> bir
198///     end
199///     bgo(bindgen generated bindings)
200///     bir --> bgo
201///     include_cpp_conf(Config from include_cpp)
202///     syn[Parse with syn]
203///     bgo --> syn
204///     conv[['conversion' mod: see below]]
205///     syn --> conv
206///     rsgen(Generated .rs TokenStream)
207///     conv --> rsgen
208///     subgraph cxx_gen
209///     cxx_codegen[cxx_gen C++ codegen]
210///     end
211///     rsgen --> cxx_codegen
212///     end
213///     conv -- autocxx C++ codegen --> cpp_output
214///     rsgen -- autocxx .rs codegen --> rs_output
215///     cxx_codegen -- cxx C++ codegen --> cpp_output
216///     subgraph rustc [rustc build]
217///     subgraph autocxx_macro
218///     include_cpp[autocxx include_cpp macro]
219///     end
220///     subgraph cxx
221///     cxxm[cxx procedural macro]
222///     end
223///     comprs(Fully expanded Rust code)
224///     end
225///     rs_output-. included .->include_cpp
226///     include_cpp --> cxxm
227///     cxxm --> comprs
228///     rss --> rustc
229///     include_cpp_conf -. used to configure .-> bindgen
230///     include_cpp_conf --> conv
231///     link[linker]
232///     cpp_output --> link
233///     comprs --> link
234/// ```
235///
236/// Here's a zoomed-in view of the "conversion" part:
237///
238/// ```mermaid
239/// flowchart TB
240///     syn[(syn parse)]
241///     apis(Unanalyzed APIs)
242///     subgraph parse
243///     syn ==> parse_bindgen
244///     end
245///     parse_bindgen ==> apis
246///     subgraph analysis
247///     typedef[typedef analysis]
248///     pod[POD analysis]
249///     apis ==> typedef
250///     typedef ==> pod
251///     podapis(APIs with POD analysis)
252///     pod ==> podapis
253///     fun[Function materialization analysis]
254///     podapis ==> fun
255///     funapis(APIs with function analysis)
256///     fun ==> funapis
257///     gc[Garbage collection]
258///     funapis ==> gc
259///     ctypes[C int analysis]
260///     gc ==> ctypes
261///     ctypes ==> finalapis
262///     end
263///     finalapis(Analyzed APIs)
264///     codegenrs(.rs codegen)
265///     codegencpp(.cpp codegen)
266///     finalapis ==> codegenrs
267///     finalapis ==> codegencpp
268/// ```
269pub struct IncludeCppEngine {
270    config: IncludeCppConfig,
271    state: State,
272    source_code: Option<Rc<String>>, // so we can create diagnostics
273}
274
275impl Parse for IncludeCppEngine {
276    fn parse(input: ParseStream) -> ParseResult<Self> {
277        let config = input.parse::<IncludeCppConfig>()?;
278        let state = if config.parse_only {
279            State::ParseOnly
280        } else {
281            State::NotGenerated
282        };
283        Ok(Self {
284            config,
285            state,
286            source_code: None,
287        })
288    }
289}
290
291impl IncludeCppEngine {
292    pub fn new_from_syn(mac: Macro, file_contents: Rc<String>) -> Result<Self> {
293        let mut this = mac
294            .parse_body::<IncludeCppEngine>()
295            .map_err(|e| Error::MacroParsing(LocatedSynError::new(e, &file_contents)))?;
296        this.source_code = Some(file_contents);
297        Ok(this)
298    }
299
300    /// Used if we find that we're asked to auto-discover extern_rust_type and similar
301    /// but didn't have any include_cpp macro at all.
302    pub fn new_for_autodiscover() -> Self {
303        Self {
304            config: IncludeCppConfig::default(),
305            state: State::NotGenerated,
306            source_code: None,
307        }
308    }
309
310    pub fn config_mut(&mut self) -> &mut IncludeCppConfig {
311        assert!(
312            matches!(self.state, State::NotGenerated),
313            "Can't alter config after generation commenced"
314        );
315        &mut self.config
316    }
317
318    fn build_header(&self) -> String {
319        join(
320            self.config
321                .inclusions
322                .iter()
323                .map(|path| format!("#include \"{path}\"\n")),
324            "",
325        )
326    }
327
328    fn make_bindgen_builder(
329        &self,
330        inc_dirs: &[PathBuf],
331        extra_clang_args: &[&str],
332    ) -> bindgen::Builder {
333        let bindgen_marker_types = ["Opaque", "Reference", "RValueReference"];
334        let raw_line = bindgen_marker_types
335            .iter()
336            .map(|t| format!("#[repr(transparent)] pub struct __bindgen_marker_{t}<T: ?Sized>(T);"))
337            .join(" ");
338        let use_list = bindgen_marker_types
339            .iter()
340            .map(|t| format!("__bindgen_marker_{t}"))
341            .join(", ");
342        let all_module_raw_line = format!("#[allow(unused_imports)] use super::{{{use_list}}}; #[allow(unused_imports)] use autocxx::c_char16_t as bindgen_cchar16_t;");
343
344        let mut builder = bindgen::builder()
345            .clang_args(make_clang_args(inc_dirs, extra_clang_args))
346            .derive_copy(false)
347            .derive_debug(false)
348            .default_enum_style(bindgen::EnumVariation::Rust {
349                non_exhaustive: false,
350            })
351            .formatter(if log::log_enabled!(log::Level::Info) {
352                bindgen::Formatter::Rustfmt
353            } else {
354                bindgen::Formatter::None
355            })
356            .size_t_is_usize(true)
357            .enable_cxx_namespaces()
358            .generate_inline_functions(true)
359            .respect_cxx_access_specs(true)
360            .use_specific_virtual_function_receiver(true)
361            .use_opaque_newtype_wrapper(true)
362            .use_reference_newtype_wrapper(true)
363            .represent_cxx_operators(true)
364            .use_distinct_char16_t(true)
365            .generate_deleted_functions(true)
366            .generate_pure_virtuals(true)
367            .raw_line(raw_line)
368            .every_module_raw_line(all_module_raw_line)
369            .generate_private_functions(true)
370            .layout_tests(false); // TODO revisit later
371
372        // 3. Passes allowlist and other options to the bindgen::Builder equivalent
373        //    to --output-style=cxx --allowlist=<as passed in>
374        if let Some(allowlist) = self.config.bindgen_allowlist() {
375            for a in allowlist {
376                // TODO - allowlist type/functions/separately
377                builder = builder
378                    .allowlist_type(&a)
379                    .allowlist_function(&a)
380                    .allowlist_function(format!("{a}_bindgen_original"))
381                    .allowlist_var(&a);
382            }
383        }
384
385        for item in &self.config.opaquelist {
386            builder = builder.opaque_type(item);
387        }
388
389        // At this point it woul be great to use `Builder::opaque_type` for
390        // everything which is on the allowlist but not on the POD list.
391        // This would free us from a large proportion of bindgen bugs which
392        // are dealing with obscure templated types. Unfortunately, even
393        // for types which we expose to the user as opaque (non-POD), autocxx
394        // internally still cares about seeing what fields they've got because
395        // we make decisions about implicit constructors on that basis.
396        // So, for now, we can't do that. Perhaps in future bindgen could
397        // gain an option to generate any implicit constructors, if that
398        // information is exposed by clang. That would remove a lot of
399        // autocxx complexity and would allow us to request opaque types.
400
401        log::info!(
402            "Bindgen flags would be: {}",
403            builder
404                .command_line_flags()
405                .into_iter()
406                .map(|f| format!("\"{f}\""))
407                .join(" ")
408        );
409        builder
410    }
411
412    pub fn get_rs_filename(&self) -> String {
413        self.config.get_rs_filename()
414    }
415
416    /// Generate the Rust bindings. Call `generate` first.
417    pub fn get_rs_output(&self) -> RsOutput {
418        RsOutput {
419            config: &self.config,
420            rs: match &self.state {
421                State::NotGenerated => panic!("Generate first"),
422                State::Generated(gen_results) => gen_results.item_mod.to_token_stream(),
423                State::ParseOnly => TokenStream2::new(),
424            },
425        }
426    }
427
428    /// Returns the name of the mod which this `include_cpp!` will generate.
429    /// Can and should be used to ensure multiple mods in a file don't conflict.
430    pub fn get_mod_name(&self) -> String {
431        self.config.get_mod_name().to_string()
432    }
433
434    fn parse_bindings(&self, bindings: bindgen::Bindings) -> Result<ItemMod> {
435        // This bindings object is actually a TokenStream internally and we're wasting
436        // effort converting to and from string. We could enhance the bindgen API
437        // in future.
438        let bindings = bindings.to_string();
439        // Manually add the mod ffi {} so that we can ask syn to parse
440        // into a single construct.
441        let bindings = format!("mod bindgen {{ {bindings} }}");
442        info!("Bindings: {}", bindings);
443        syn::parse_str::<ItemMod>(&bindings)
444            .map_err(|e| Error::BindingsParsing(LocatedSynError::new(e, &bindings)))
445    }
446
447    /// Actually examine the headers to find out what needs generating.
448    /// Most errors occur at this stage as we fail to interpret the C++
449    /// headers properly.
450    ///
451    /// See documentation for this type for flow diagrams and more details.
452    pub fn generate(
453        &mut self,
454        inc_dirs: Vec<PathBuf>,
455        extra_clang_args: &[&str],
456        dep_recorder: Option<Box<dyn RebuildDependencyRecorder>>,
457        codegen_options: &CodegenOptions,
458    ) -> Result<()> {
459        // If we are in parse only mode, do nothing. This is used for
460        // doc tests to ensure the parsing is valid, but we can't expect
461        // valid C++ header files or linkers to allow a complete build.
462        match self.state {
463            State::ParseOnly => return Ok(()),
464            State::NotGenerated => {}
465            State::Generated(_) => panic!("Only call generate once"),
466        }
467
468        if matches!(
469            self.config.unsafe_policy,
470            UnsafePolicy::ReferencesWrappedAllFunctionsSafe
471        ) && !rustversion::cfg!(nightly)
472        {
473            return Err(Error::WrappedReferencesButNoArbitrarySelfTypes);
474        }
475
476        let parse_callback_results =
477            Rc::new(RefCell::new(UnindexedParseCallbackResults::default()));
478        let mod_name = self.config.get_mod_name();
479        let mut builder = self
480            .make_bindgen_builder(&inc_dirs, extra_clang_args)
481            .parse_callbacks(Box::new(AutocxxParseCallbacks::new(
482                dep_recorder,
483                parse_callback_results.clone(),
484            )));
485        let header_contents = self.build_header();
486        self.dump_header_if_so_configured(&header_contents, &inc_dirs, extra_clang_args);
487        let header_and_prelude = format!("{}\n\n{}", known_types().get_prelude(), header_contents);
488        log::info!("Header and prelude for bindgen:\n{}", header_and_prelude);
489        builder = builder.header_contents("example.hpp", &header_and_prelude);
490
491        let bindings = builder.generate().map_err(Error::Bindgen)?;
492        let bindings = self.parse_bindings(bindings)?;
493        let parse_callback_results = parse_callback_results.take();
494        log::info!("Parse callback results: {:?}", parse_callback_results);
495
496        // Source code contents just used for diagnostics - if we don't have it,
497        // use a blank string and miette will not attempt to annotate it nicely.
498        let source_file_contents = self
499            .source_code
500            .as_ref()
501            .cloned()
502            .unwrap_or_else(|| Rc::new("".to_string()));
503
504        let converter = BridgeConverter::new(&self.config.inclusions, &self.config);
505
506        let conversion = converter
507            .convert(
508                bindings,
509                parse_callback_results.index(),
510                self.config.unsafe_policy.clone(),
511                header_contents,
512                codegen_options,
513                &source_file_contents,
514            )
515            .map_err(Error::Conversion)?;
516        let items = conversion.rs;
517        let new_bindings: ItemMod = parse_quote! {
518            #[allow(non_snake_case)]
519            #[allow(dead_code)]
520            #[allow(non_upper_case_globals)]
521            #[allow(non_camel_case_types)]
522            #[doc = "Generated using autocxx - do not edit directly"]
523            #[doc = "@generated"]
524            mod #mod_name {
525                #(#items)*
526            }
527        };
528        info!(
529            "New bindings:\n{}",
530            rust_pretty_printer::pretty_print(&new_bindings)
531        );
532        self.state = State::Generated(Box::new(GenerationResults {
533            item_mod: new_bindings,
534            cpp: conversion.cpp,
535            inc_dirs,
536            cxxgen_header_name: conversion.cxxgen_header_name,
537        }));
538        Ok(())
539    }
540
541    /// Return the include directories used for this include_cpp invocation.
542    #[cfg(any(test, feature = "build"))]
543    fn include_dirs(&self) -> impl Iterator<Item = &PathBuf> {
544        match &self.state {
545            State::Generated(gen_results) => gen_results.inc_dirs.iter(),
546            _ => panic!("Must call generate() before include_dirs()"),
547        }
548    }
549
550    fn dump_header_if_so_configured(
551        &self,
552        header: &str,
553        inc_dirs: &[PathBuf],
554        extra_clang_args: &[&str],
555    ) {
556        if let Ok(output_path) = std::env::var("AUTOCXX_PREPROCESS") {
557            self.make_preprocessed_file(
558                &PathBuf::from(output_path),
559                header,
560                inc_dirs,
561                extra_clang_args,
562            );
563        }
564        #[cfg(feature = "reproduction_case")]
565        if let Ok(output_path) = std::env::var("AUTOCXX_REPRO_CASE") {
566            let tf = NamedTempFile::new().unwrap();
567            self.make_preprocessed_file(
568                &PathBuf::from(tf.path()),
569                header,
570                inc_dirs,
571                extra_clang_args,
572            );
573            let header = std::fs::read(tf.path()).unwrap();
574            let header = String::from_utf8_lossy(&header);
575            let output_path = PathBuf::from(output_path);
576            let config = self.config.to_token_stream().to_string();
577            let json = serde_json::json!({
578                "header": header,
579                "config": config
580            });
581            let f = File::create(output_path).unwrap();
582            serde_json::to_writer(f, &json).unwrap();
583        }
584    }
585
586    fn make_preprocessed_file(
587        &self,
588        output_path: &Path,
589        header: &str,
590        inc_dirs: &[PathBuf],
591        extra_clang_args: &[&str],
592    ) {
593        // Include a load of system headers at the end of the preprocessed output,
594        // because we would like to be able to generate bindings from the
595        // preprocessed header, and then build those bindings. The C++ parts
596        // of those bindings might need things inside these various headers;
597        // we make sure all these definitions and declarations are inside
598        // this one header file so that the reduction process does not have
599        // to refer to local headers on the reduction machine too.
600        let suffix = ALL_KNOWN_SYSTEM_HEADERS
601            .iter()
602            .map(|hdr| format!("#include <{hdr}>\n"))
603            .join("\n");
604        let input = format!("/*\nautocxx config:\n\n{:?}\n\nend autocxx config.\nautocxx preprocessed input:\n*/\n\n{}\n\n/* autocxx: extra headers added below for completeness. */\n\n{}\n{}\n",
605            self.config, header, suffix, cxx_gen::HEADER);
606        let mut tf = NamedTempFile::new().unwrap();
607        write!(tf, "{input}").unwrap();
608        let tp = tf.into_temp_path();
609        preprocess(&tp, &PathBuf::from(output_path), inc_dirs, extra_clang_args).unwrap();
610    }
611}
612
613/// This is a list of all the headers known to be included in generated
614/// C++ by cxx. We only use this when `AUTOCXX_PERPROCESS` is set to true,
615/// in an attempt to make the resulting preprocessed header more hermetic.
616/// We clearly should _not_ use this in any other circumstance; obviously
617/// we'd then want to add an API to cxx_gen such that we could retrieve
618/// that information from source.
619static ALL_KNOWN_SYSTEM_HEADERS: &[&str] = &[
620    "memory",
621    "string",
622    "algorithm",
623    "array",
624    "cassert",
625    "cstddef",
626    "cstdint",
627    "cstring",
628    "exception",
629    "functional",
630    "initializer_list",
631    "iterator",
632    "memory",
633    "new",
634    "stdexcept",
635    "type_traits",
636    "utility",
637    "vector",
638    "sys/types.h",
639];
640
641pub fn do_cxx_cpp_generation(
642    rs: TokenStream2,
643    cpp_codegen_options: &CppCodegenOptions,
644    cxxgen_header_name: String,
645) -> Result<CppFilePair, cxx_gen::Error> {
646    let mut opt = cxx_gen::Opt::default();
647    opt.cxx_impl_annotations
648        .clone_from(&cpp_codegen_options.cxx_impl_annotations);
649    let cxx_generated = cxx_gen::generate_header_and_cc(rs, &opt)?;
650    Ok(CppFilePair {
651        header: strip_system_headers(
652            cxx_generated.header,
653            cpp_codegen_options.suppress_system_headers,
654        ),
655        header_name: cxxgen_header_name,
656        implementation: Some(strip_system_headers(
657            cxx_generated.implementation,
658            cpp_codegen_options.suppress_system_headers,
659        )),
660    })
661}
662
663pub fn get_cxx_header_bytes(suppress_system_headers: bool) -> Vec<u8> {
664    strip_system_headers(cxx_gen::HEADER.as_bytes().to_vec(), suppress_system_headers)
665}
666
667fn strip_system_headers(input: Vec<u8>, suppress_system_headers: bool) -> Vec<u8> {
668    if suppress_system_headers {
669        std::str::from_utf8(&input)
670            .unwrap()
671            .lines()
672            .filter(|l| !l.starts_with("#include <"))
673            .join("\n")
674            .as_bytes()
675            .to_vec()
676    } else {
677        input
678    }
679}
680
681impl CppBuildable for IncludeCppEngine {
682    /// Generate C++-side bindings for these APIs. Call `generate` first.
683    fn generate_h_and_cxx(
684        &self,
685        cpp_codegen_options: &CppCodegenOptions,
686    ) -> Result<GeneratedCpp, cxx_gen::Error> {
687        let mut files = Vec::new();
688        match &self.state {
689            State::ParseOnly => panic!("Cannot generate C++ in parse-only mode"),
690            State::NotGenerated => panic!("Call generate() first"),
691            State::Generated(gen_results) => {
692                let rs = gen_results.item_mod.to_token_stream();
693                files.push(do_cxx_cpp_generation(
694                    rs,
695                    cpp_codegen_options,
696                    gen_results.cxxgen_header_name.clone(),
697                )?);
698                if let Some(cpp_file_pair) = &gen_results.cpp {
699                    files.push(cpp_file_pair.clone());
700                }
701            }
702        };
703        Ok(GeneratedCpp(files))
704    }
705}
706
707/// Get clang args as if we were operating clang the same way as we operate
708/// bindgen.
709pub fn make_clang_args<'a>(
710    incs: &'a [PathBuf],
711    extra_args: &'a [&str],
712) -> impl Iterator<Item = String> + 'a {
713    // AUTOCXX_CLANG_ARGS come first so that any defaults defined there(e.g. for the `-std`
714    // argument) can be overridden by extra_args.
715    AUTOCXX_CLANG_ARGS
716        .iter()
717        .map(|s| s.to_string())
718        .chain(incs.iter().map(|i| format!("-I{}", i.to_str().unwrap())))
719        .chain(extra_args.iter().map(|s| s.to_string()))
720}
721
722/// Preprocess a file using the same options
723/// as is used by autocxx. Input: listing_path, output: preprocess_path.
724pub fn preprocess(
725    listing_path: &Path,
726    preprocess_path: &Path,
727    incs: &[PathBuf],
728    extra_clang_args: &[&str],
729) -> Result<(), std::io::Error> {
730    let mut cmd = Command::new(get_clang_path());
731    cmd.arg("-E");
732    cmd.arg("-C");
733    cmd.args(make_clang_args(incs, extra_clang_args));
734    cmd.arg(listing_path.to_str().unwrap());
735    cmd.stderr(Stdio::inherit());
736    let result = cmd.output().expect("failed to execute clang++");
737    assert!(result.status.success(), "failed to preprocess");
738    let mut file = File::create(preprocess_path)?;
739    file.write_all(&result.stdout)?;
740    Ok(())
741}
742
743/// Get the path to clang which is effective for any preprocessing
744/// operations done by autocxx.
745pub fn get_clang_path() -> String {
746    // `CLANG_PATH` is the environment variable that clang-sys uses to specify
747    // the path to Clang, so in most cases where someone is using a compiler
748    // that's not on the path, things should just work. We also check `CXX`,
749    // since some users may have set that.
750    std::env::var("CLANG_PATH")
751        .or_else(|_| std::env::var("CXX"))
752        .unwrap_or_else(|_| "clang++".to_string())
753}
754
755/// Function to generate the desired name of the header containing autocxx's
756/// extra generated C++.
757/// Newtype wrapper so we can give it a [`Default`].
758pub struct AutocxxgenHeaderNamer<'a>(pub Box<dyn 'a + Fn(String) -> String>);
759
760impl Default for AutocxxgenHeaderNamer<'static> {
761    fn default() -> Self {
762        Self(Box::new(|mod_name| format!("autocxxgen_{mod_name}.h")))
763    }
764}
765
766impl AutocxxgenHeaderNamer<'_> {
767    fn name_header(&self, mod_name: String) -> String {
768        self.0(mod_name)
769    }
770}
771
772/// Function to generate the desired name of the header containing cxx's
773/// declarations.
774/// Newtype wrapper so we can give it a [`Default`].
775pub struct CxxgenHeaderNamer<'a>(pub Box<dyn 'a + Fn() -> String>);
776
777impl Default for CxxgenHeaderNamer<'static> {
778    fn default() -> Self {
779        // The default implementation here is to name these headers
780        // cxxgen.h, cxxgen1.h, cxxgen2.h etc.
781        // These names are not especially predictable by callers and this
782        // behavior is not tested anywhere - so this is considered semi-
783        // supported, at best. This only comes into play in the rare case
784        // that you're generating bindings to multiple include_cpp!
785        // or a mix of include_cpp! and #[cxx::bridge] bindings.
786        let header_counter = Rc::new(RefCell::new(0));
787        Self(Box::new(move || {
788            let header_counter = header_counter.clone();
789            let header_counter_cell = header_counter.as_ref();
790            let mut header_counter = header_counter_cell.borrow_mut();
791            if *header_counter == 0 {
792                *header_counter += 1;
793                "cxxgen.h".into()
794            } else {
795                let count = *header_counter;
796                *header_counter += 1;
797                format!("cxxgen{count}.h")
798            }
799        }))
800    }
801}
802
803impl CxxgenHeaderNamer<'_> {
804    fn name_header(&self) -> String {
805        self.0()
806    }
807}
808
809/// Options for C++ codegen
810#[derive(Default)]
811pub struct CppCodegenOptions<'a> {
812    /// Whether to avoid generating `#include <some-system-header>`.
813    /// You may wish to do this to make a hermetic test case with no
814    /// external dependencies.
815    pub suppress_system_headers: bool,
816    /// Optionally, a prefix to go at `#include "*here*cxx.h". This is a header file from the `cxx`
817    /// crate.
818    pub path_to_cxx_h: Option<String>,
819    /// Optionally, a prefix to go at `#include "*here*cxxgen.h". This is a header file which we
820    /// generate.
821    pub path_to_cxxgen_h: Option<String>,
822    /// Optionally, a function called to determine the name that will be used
823    /// for the autocxxgen.h file.
824    /// The function is passed the name of the module generated by each `include_cpp`,
825    /// configured via `name`. These will be unique.
826    pub autocxxgen_header_namer: AutocxxgenHeaderNamer<'a>,
827    /// A function to generate the name of the cxxgen.h header that should be output.
828    pub cxxgen_header_namer: CxxgenHeaderNamer<'a>,
829    /// An annotation optionally to include on each C++ function.
830    /// For example to export the symbol from a library.
831    pub cxx_impl_annotations: Option<String>,
832}
833
834fn proc_macro_span_to_miette_span(span: &proc_macro2::Span) -> SourceSpan {
835    // A proc_macro2::Span stores its location as a byte offset. But there are
836    // no APIs to get that offset out.
837    // We could use `.start()` and `.end()` to get the line + column numbers, but it appears
838    // they're a little buggy. Hence we do this, to get the offsets directly across into
839    // miette.
840    struct Err;
841    let r: Result<(usize, usize), Err> = (|| {
842        let span_desc = format!("{span:?}");
843        let re = Regex::new(r"(\d+)..(\d+)").unwrap();
844        let captures = re.captures(&span_desc).ok_or(Err)?;
845        let start = captures.get(1).ok_or(Err)?;
846        let start: usize = start.as_str().parse().map_err(|_| Err)?;
847        let start = start.saturating_sub(1); // proc_macro::Span offsets seem to be off-by-one
848        let end = captures.get(2).ok_or(Err)?;
849        let end: usize = end.as_str().parse().map_err(|_| Err)?;
850        let end = end.saturating_sub(1); // proc_macro::Span offsets seem to be off-by-one
851        Ok((start, end.saturating_sub(start)))
852    })();
853    let (start, end) = r.unwrap_or((0, 0));
854    SourceSpan::new(SourceOffset::from(start), SourceOffset::from(end))
855}