use std::collections::{HashMap, HashSet};
use std::ops::ControlFlow;
use std::path::{Path, PathBuf};
use crate::apidoc::{ApidocCollector, ApidocDict, ApidocResolveError};
use crate::ast::{DerivedDecl, ExternalDecl, TypeSpec};
use crate::c_fn_decl::{CFnDecl, CFnDeclDict, CParam};
use crate::enum_dict::EnumDict;
use crate::error::EnrichedCompileError;
use crate::fields_dict::FieldsDict;
use crate::inline_fn::InlineFnDict;
use crate::intern::InternedStr;
use crate::macro_infer::{ExplicitExpandSymbols, MacroInferContext, NoExpandSymbols};
use crate::parser::Parser;
use crate::perl_config::PerlConfigError;
use crate::preprocessor::{MacroCallWatcher, MacroDefCallback, Preprocessor};
use crate::rust_decl::RustDeclDict;
pub type TypedefDict = HashSet<InternedStr>;
#[derive(Debug)]
pub enum InferError {
PerlConfig(PerlConfigError),
ApidocResolve(ApidocResolveError),
Compile(EnrichedCompileError),
Io(std::io::Error),
}
impl std::fmt::Display for InferError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InferError::PerlConfig(e) => write!(f, "Perl config error: {}", e),
InferError::ApidocResolve(e) => write!(f, "Apidoc resolve error: {}", e),
InferError::Compile(e) => write!(f, "Compile error: {}", e),
InferError::Io(e) => write!(f, "I/O error: {}", e),
}
}
}
impl std::error::Error for InferError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
InferError::PerlConfig(e) => Some(e),
InferError::ApidocResolve(e) => Some(e),
InferError::Compile(e) => Some(e),
InferError::Io(e) => Some(e),
}
}
}
impl From<PerlConfigError> for InferError {
fn from(e: PerlConfigError) -> Self {
InferError::PerlConfig(e)
}
}
impl From<ApidocResolveError> for InferError {
fn from(e: ApidocResolveError) -> Self {
InferError::ApidocResolve(e)
}
}
impl From<EnrichedCompileError> for InferError {
fn from(e: EnrichedCompileError) -> Self {
InferError::Compile(e)
}
}
impl From<std::io::Error> for InferError {
fn from(e: std::io::Error) -> Self {
InferError::Io(e)
}
}
#[derive(Debug, Clone)]
pub struct InferConfig {
pub input_file: PathBuf,
pub apidoc_path: Option<PathBuf>,
pub bindings_path: Option<PathBuf>,
pub apidoc_dir: Option<PathBuf>,
pub debug: bool,
}
impl InferConfig {
pub fn new(input_file: PathBuf) -> Self {
Self {
input_file,
apidoc_path: None,
bindings_path: None,
apidoc_dir: None,
debug: false,
}
}
pub fn with_apidoc(mut self, path: PathBuf) -> Self {
self.apidoc_path = Some(path);
self
}
pub fn with_bindings(mut self, path: PathBuf) -> Self {
self.bindings_path = Some(path);
self
}
pub fn with_apidoc_dir(mut self, path: PathBuf) -> Self {
self.apidoc_dir = Some(path);
self
}
pub fn with_debug(mut self, debug: bool) -> Self {
self.debug = debug;
self
}
}
#[derive(Debug, Clone, Default)]
pub struct DebugOptions {
pub dump_apidoc_after_merge: Option<String>,
pub debug_type_inference: Vec<String>,
}
impl DebugOptions {
pub fn new() -> Self {
Self::default()
}
pub fn dump_apidoc(mut self, filter: impl Into<String>) -> Self {
self.dump_apidoc_after_merge = Some(filter.into());
self
}
}
struct CommonMacroBodyCollector {
targets: HashSet<InternedStr>,
bodies: HashMap<InternedStr, Vec<crate::token::Token>>,
}
impl CommonMacroBodyCollector {
fn new(targets: HashSet<InternedStr>) -> Self {
Self { targets, bodies: HashMap::new() }
}
}
impl MacroDefCallback for CommonMacroBodyCollector {
fn on_macro_defined(&mut self, def: &crate::macro_def::MacroDef) {
if self.targets.contains(&def.name) {
self.bodies.insert(def.name, def.body.clone());
}
}
fn into_any(self: Box<Self>) -> Box<dyn std::any::Any> { self }
}
#[derive(Debug, Clone, Default)]
pub struct InferStats {
pub apidoc_from_comments: usize,
pub thx_dependent_count: usize,
pub c_fn_decl_count: usize,
pub c_fn_thx_count: usize,
}
pub struct InferResult {
pub infer_ctx: MacroInferContext,
pub fields_dict: FieldsDict,
pub enum_dict: EnumDict,
pub inline_fn_dict: InlineFnDict,
pub apidoc: ApidocDict,
pub rust_decl_dict: Option<RustDeclDict>,
pub c_fn_decl_dict: CFnDeclDict,
pub typedefs: TypedefDict,
pub global_const_dict: crate::global_const_dict::GlobalConstDict,
pub apidoc_patches: crate::apidoc_patches::ApidocPatchSet,
pub perl_build_mode: crate::perl_config::PerlBuildMode,
pub perlvar_dict: crate::perlvar_dict::PerlvarDict,
pub preprocessor: Preprocessor,
pub stats: InferStats,
}
pub fn run_inference_with_preprocessor(
mut pp: Preprocessor,
apidoc_path: Option<&Path>,
bindings_path: Option<&Path>,
debug_opts: Option<&DebugOptions>,
skip_codegen_lists: &[PathBuf],
perl_build_mode_override: Option<crate::perl_config::PerlBuildMode>,
) -> Result<Option<InferResult>, InferError> {
let perl_build_mode = match perl_build_mode_override {
Some(m) => m,
None => crate::perl_config::PerlBuildMode::detect_from_perl_config()
.unwrap_or(crate::perl_config::PerlBuildMode::Threaded),
};
eprintln!("[perl-mode] {:?}", perl_build_mode);
let rust_decl_dict = if let Some(path) = bindings_path {
Some(RustDeclDict::parse_file(path)?)
} else {
None
};
if let Some(ref dict) = rust_decl_dict {
for name in dict.consts.keys() {
let interned = pp.interner_mut().intern(name);
pp.add_skip_expand_macro(interned);
}
dict.intern_names(pp.interner_mut());
}
{
let explicit_expand = ExplicitExpandSymbols::new(pp.interner_mut());
pp.add_explicit_expand_macros(explicit_expand.iter());
}
let mut fields_dict = FieldsDict::new();
let mut global_const_dict = crate::global_const_dict::GlobalConstDict::new();
let mut enum_dict = EnumDict::new();
pp.set_comment_callback(Box::new(ApidocCollector::new()));
let sv_head_id = pp.interner_mut().intern("_SV_HEAD");
pp.set_macro_called_callback(sv_head_id, Box::new(MacroCallWatcher::new()));
const COMMON_FIELD_MACROS: &[&str] = &["_XPV_HEAD", "_XPVCV_COMMON"];
let common_field_macro_ids: Vec<InternedStr> = COMMON_FIELD_MACROS
.iter()
.map(|name| {
let id = pp.interner_mut().intern(name);
pp.set_macro_called_callback(id, Box::new(MacroCallWatcher::new()));
id
})
.collect();
pp.set_macro_def_callback(Box::new(CommonMacroBodyCollector::new(
common_field_macro_ids.iter().copied().collect(),
)));
let pthx_id = pp.interner_mut().intern("pTHX_");
let pthx_no_comma_id = pp.interner_mut().intern("pTHX");
if perl_build_mode.is_threaded() {
pp.set_macro_called_callback(pthx_id, Box::new(MacroCallWatcher::new()));
pp.set_macro_called_callback(pthx_no_comma_id, Box::new(MacroCallWatcher::new()));
}
let mut c_fn_decl_dict = CFnDeclDict::new();
let mut parser = match Parser::new(&mut pp) {
Ok(p) => p,
Err(e) => return Err(InferError::Compile(e.with_files(pp.files()))),
};
let mut inline_fn_dict = InlineFnDict::new();
let parse_result = parser.parse_each_with_pp(|decl, loc, path, pp| {
let interner = pp.interner();
fields_dict.collect_from_external_decl(decl, decl.is_target(), interner);
global_const_dict.try_collect(decl, decl.is_target(), interner);
enum_dict.collect_from_external_decl(decl, decl.is_target(), interner);
if decl.is_target() {
if let ExternalDecl::FunctionDef(func_def) = decl {
inline_fn_dict.collect_from_function_def(func_def, interner);
}
}
if let ExternalDecl::Declaration(declaration) = decl {
let is_thx = check_macro_called(pp, pthx_id) || check_macro_called(pp, pthx_no_comma_id);
collect_function_declarations(
declaration,
&mut c_fn_decl_dict,
is_thx,
loc,
path,
interner,
);
reset_macro_called(pp, pthx_id);
reset_macro_called(pp, pthx_no_comma_id);
}
if decl.is_target() {
if let Some(struct_names) = extract_struct_names(decl) {
if let Some(cb) = pp.get_macro_called_callback(sv_head_id) {
if let Some(watcher) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
if watcher.take_called() {
let type_name = watcher.last_args()
.and_then(|args| args.first().cloned())
.unwrap_or_default();
for name in &struct_names {
fields_dict.add_sv_family_member_with_type(*name, &type_name);
}
}
}
}
for ¯o_id in &common_field_macro_ids {
if let Some(cb) = pp.get_macro_called_callback(macro_id) {
if let Some(watcher) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
if watcher.take_called() {
for name in &struct_names {
fields_dict.add_struct_uses_common_macro(*name, macro_id);
}
}
}
}
}
}
}
ControlFlow::Continue(())
});
if let Err(e) = parse_result {
drop(parser);
return Err(InferError::Compile(e.with_files(pp.files())));
}
let typedefs = parser.typedefs().clone();
let callback = pp.take_comment_callback().expect("callback should exist");
let apidoc_collector = callback
.into_any()
.downcast::<ApidocCollector>()
.expect("callback type mismatch");
let token_type_macros: Vec<InternedStr> = apidoc_collector
.token_type_macros()
.iter()
.map(|name| pp.interner_mut().intern(name))
.collect();
fields_dict.build_consistent_type_cache(pp.interner());
{
let collector = pp
.take_macro_def_callback()
.and_then(|cb| cb.into_any().downcast::<CommonMacroBodyCollector>().ok());
let mut macro_bodies: Vec<(InternedStr, Vec<crate::token::Token>)> = collector
.map(|c| c.bodies.into_iter().collect())
.unwrap_or_default();
let pthx_id = pp.interner_mut().intern("pTHX_");
let pthx_no_comma_id = pp.interner_mut().intern("pTHX");
for (_id, body) in macro_bodies.iter_mut() {
body.retain(|t| !matches!(&t.kind,
crate::token::TokenKind::Ident(id)
if *id == pthx_id || *id == pthx_no_comma_id));
}
let interner = pp.interner();
let files = pp.files().clone();
let typedefs_ref = typedefs.clone();
fields_dict.build_common_macro_fields(¯o_bodies, |body| {
crate::parser::parse_struct_members_from_tokens_ref(
body, interner, &files, &typedefs_ref,
).map_err(crate::error::CompileError::from)
});
}
if let Some(ref dict) = rust_decl_dict {
fields_dict.build_common_field_rust_types(dict, pp.interner_mut());
}
fields_dict.build_common_macro_sv_family(pp.interner());
let mut apidoc = if let Some(path) = apidoc_path {
ApidocDict::load_auto(path)?
} else {
ApidocDict::new()
};
let apidoc_from_comments = apidoc_collector.len();
apidoc_collector.merge_into(&mut apidoc);
let mut apidoc_patches = if let Some(path) = apidoc_path {
crate::apidoc_patches::ApidocPatchSet::load_for_apidoc_path(path)?
} else {
crate::apidoc_patches::ApidocPatchSet::empty()
};
for list_path in skip_codegen_lists {
let added = apidoc_patches.merge_skip_list(list_path)?;
eprintln!(
"[apidoc-patches] merged {} skip entry(ies) from {}",
added, list_path.display()
);
}
if !apidoc_patches.is_empty() {
let applied = apidoc_patches.apply_to_apidoc(&mut apidoc);
if !apidoc_patches.source_paths.is_empty() {
let paths_str = apidoc_patches.source_paths.iter()
.map(|p| p.display().to_string())
.collect::<Vec<_>>()
.join(", ");
eprintln!(
"[apidoc-patches] loaded {} patch(es) from [{}] ({} return-type override applied, {} skip-codegen registered)",
apidoc_patches.count(),
paths_str,
applied.len(),
apidoc_patches.skip_codegen.len(),
);
}
}
apidoc.expand_type_macros(pp.macros(), pp.interner());
if let Some(opts) = debug_opts {
if let Some(filter) = &opts.dump_apidoc_after_merge {
apidoc.dump_filtered(filter);
return Ok(None);
}
}
let mut infer_ctx = MacroInferContext::new();
if let Some(opts) = debug_opts {
if !opts.debug_type_inference.is_empty() {
infer_ctx.set_debug_macros(opts.debug_type_inference.iter().cloned());
}
}
let sym_athx = pp.interner_mut().intern("aTHX");
let sym_tthx = pp.interner_mut().intern("tTHX");
let sym_my_perl = pp.interner_mut().intern("my_perl");
let thx_symbols = (sym_athx, sym_tthx, sym_my_perl);
let no_expand = NoExpandSymbols::new(pp.interner_mut());
{
let explicit_expand = ExplicitExpandSymbols::new(pp.interner_mut());
pp.add_explicit_expand_macros(explicit_expand.iter());
}
pp.add_explicit_expand_macros(token_type_macros.iter().copied());
infer_ctx.analyze_all_macros(
&mut pp,
Some(&apidoc),
Some(&apidoc_patches),
Some(&fields_dict),
rust_decl_dict.as_ref(),
Some(&mut inline_fn_dict),
Some(&c_fn_decl_dict),
&typedefs,
thx_symbols,
no_expand,
perl_build_mode,
);
let thx_dependent_count = infer_ctx.macros.values()
.filter(|info| info.is_target && info.is_thx_dependent)
.count();
let c_fn_decl_count = c_fn_decl_dict.len();
let c_fn_thx_count = c_fn_decl_dict.thx_count();
let stats = InferStats {
apidoc_from_comments,
thx_dependent_count,
c_fn_decl_count,
c_fn_thx_count,
};
infer_ctx.resolve_param_and_return_types(
pp.interner_mut(),
rust_decl_dict.as_ref(),
&inline_fn_dict,
);
Ok(Some(InferResult {
infer_ctx,
fields_dict,
enum_dict,
inline_fn_dict,
apidoc,
rust_decl_dict,
c_fn_decl_dict,
typedefs,
global_const_dict,
apidoc_patches,
perl_build_mode,
perlvar_dict: crate::perlvar_dict::PerlvarDict::new(),
preprocessor: pp,
stats,
}))
}
fn extract_struct_names(decl: &ExternalDecl) -> Option<Vec<InternedStr>> {
let declaration = match decl {
ExternalDecl::Declaration(d) => d,
_ => return None,
};
let mut names = Vec::new();
for type_spec in &declaration.specs.type_specs {
match type_spec {
TypeSpec::Struct(spec) | TypeSpec::Union(spec) => {
if spec.members.is_some() {
if let Some(name) = spec.name {
names.push(name);
}
}
}
_ => {}
}
}
if names.is_empty() {
None
} else {
Some(names)
}
}
fn check_macro_called(pp: &Preprocessor, macro_id: InternedStr) -> bool {
pp.get_macro_called_callback(macro_id)
.and_then(|cb| cb.as_any().downcast_ref::<MacroCallWatcher>())
.is_some_and(|w| w.was_called())
}
fn reset_macro_called(pp: &Preprocessor, macro_id: InternedStr) {
if let Some(cb) = pp.get_macro_called_callback(macro_id) {
if let Some(w) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
w.take_called(); }
}
}
fn collect_function_declarations(
declaration: &crate::ast::Declaration,
dict: &mut CFnDeclDict,
is_thx: bool,
loc: &crate::source::SourceLocation,
path: &std::path::Path,
interner: &crate::intern::StringInterner,
) {
for init_decl in &declaration.declarators {
let declarator = &init_decl.declarator;
let param_list = declarator.derived.iter().find_map(|d| {
if let DerivedDecl::Function(params) = d {
Some(params)
} else {
None
}
});
if let Some(param_list) = param_list {
if let Some(name) = declarator.name {
let params: Vec<CParam> = param_list.params.iter().map(|param| {
let param_name = param.declarator.as_ref().and_then(|d| d.name);
let ty = type_specs_to_string(¶m.specs, interner);
CParam { name: param_name, ty }
}).collect();
let ret_ty = type_specs_to_string(&declaration.specs, interner);
let c_fn_decl = CFnDecl {
name,
params,
ret_ty,
is_thx,
is_target: declaration.is_target,
location: Some(format!("{}:{}", path.display(), loc.line)),
};
dict.insert(c_fn_decl);
}
}
}
}
fn type_specs_to_string(specs: &crate::ast::DeclSpecs, interner: &crate::intern::StringInterner) -> String {
use crate::ast::TypeSpec;
let mut parts = Vec::new();
for type_spec in &specs.type_specs {
match type_spec {
TypeSpec::Void => parts.push("void".to_string()),
TypeSpec::Char => parts.push("char".to_string()),
TypeSpec::Short => parts.push("short".to_string()),
TypeSpec::Int => parts.push("int".to_string()),
TypeSpec::Long => parts.push("long".to_string()),
TypeSpec::Float => parts.push("float".to_string()),
TypeSpec::Double => parts.push("double".to_string()),
TypeSpec::Signed => parts.push("signed".to_string()),
TypeSpec::Unsigned => parts.push("unsigned".to_string()),
TypeSpec::Bool => parts.push("bool".to_string()),
TypeSpec::Complex => parts.push("_Complex".to_string()),
TypeSpec::TypedefName(name) => parts.push(interner.get(*name).to_string()),
TypeSpec::Struct(spec) => {
if let Some(name) = spec.name {
parts.push(format!("struct {}", interner.get(name)));
} else {
parts.push("struct".to_string());
}
}
TypeSpec::Union(spec) => {
if let Some(name) = spec.name {
parts.push(format!("union {}", interner.get(name)));
} else {
parts.push("union".to_string());
}
}
TypeSpec::Enum(spec) => {
if let Some(name) = spec.name {
parts.push(format!("enum {}", interner.get(name)));
} else {
parts.push("enum".to_string());
}
}
TypeSpec::TypeofExpr(_) => parts.push("typeof(...)".to_string()),
TypeSpec::Int128 => parts.push("__int128".to_string()),
TypeSpec::Float16 => parts.push("_Float16".to_string()),
TypeSpec::Float32 => parts.push("_Float32".to_string()),
TypeSpec::Float64 => parts.push("_Float64".to_string()),
TypeSpec::Float128 => parts.push("_Float128".to_string()),
TypeSpec::Float32x => parts.push("_Float32x".to_string()),
TypeSpec::Float64x => parts.push("_Float64x".to_string()),
}
}
if parts.is_empty() {
"int".to_string() } else {
parts.join(" ")
}
}