1use std::collections::{HashMap, HashSet};
6use std::ops::ControlFlow;
7use std::path::{Path, PathBuf};
8
9use crate::apidoc::{ApidocCollector, ApidocDict, ApidocResolveError};
10use crate::ast::{DerivedDecl, ExternalDecl, TypeSpec};
11use crate::c_fn_decl::{CFnDecl, CFnDeclDict, CParam};
12use crate::enum_dict::EnumDict;
13use crate::error::EnrichedCompileError;
14use crate::fields_dict::FieldsDict;
15use crate::inline_fn::InlineFnDict;
16use crate::intern::InternedStr;
17use crate::macro_infer::{ExplicitExpandSymbols, MacroInferContext, NoExpandSymbols};
18use crate::parser::Parser;
19use crate::perl_config::PerlConfigError;
20use crate::preprocessor::{MacroCallWatcher, MacroDefCallback, Preprocessor};
21use crate::rust_decl::RustDeclDict;
22
23pub type TypedefDict = HashSet<InternedStr>;
25
26#[derive(Debug)]
28pub enum InferError {
29 PerlConfig(PerlConfigError),
31 ApidocResolve(ApidocResolveError),
33 Compile(EnrichedCompileError),
35 Io(std::io::Error),
37}
38
39impl std::fmt::Display for InferError {
40 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 match self {
42 InferError::PerlConfig(e) => write!(f, "Perl config error: {}", e),
43 InferError::ApidocResolve(e) => write!(f, "Apidoc resolve error: {}", e),
44 InferError::Compile(e) => write!(f, "Compile error: {}", e),
45 InferError::Io(e) => write!(f, "I/O error: {}", e),
46 }
47 }
48}
49
50impl std::error::Error for InferError {
51 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
52 match self {
53 InferError::PerlConfig(e) => Some(e),
54 InferError::ApidocResolve(e) => Some(e),
55 InferError::Compile(e) => Some(e),
56 InferError::Io(e) => Some(e),
57 }
58 }
59}
60
61impl From<PerlConfigError> for InferError {
62 fn from(e: PerlConfigError) -> Self {
63 InferError::PerlConfig(e)
64 }
65}
66
67impl From<ApidocResolveError> for InferError {
68 fn from(e: ApidocResolveError) -> Self {
69 InferError::ApidocResolve(e)
70 }
71}
72
73impl From<EnrichedCompileError> for InferError {
74 fn from(e: EnrichedCompileError) -> Self {
75 InferError::Compile(e)
76 }
77}
78
79impl From<std::io::Error> for InferError {
80 fn from(e: std::io::Error) -> Self {
81 InferError::Io(e)
82 }
83}
84
85#[derive(Debug, Clone)]
87pub struct InferConfig {
88 pub input_file: PathBuf,
90 pub apidoc_path: Option<PathBuf>,
92 pub bindings_path: Option<PathBuf>,
94 pub apidoc_dir: Option<PathBuf>,
96 pub debug: bool,
98}
99
100impl InferConfig {
101 pub fn new(input_file: PathBuf) -> Self {
103 Self {
104 input_file,
105 apidoc_path: None,
106 bindings_path: None,
107 apidoc_dir: None,
108 debug: false,
109 }
110 }
111
112 pub fn with_apidoc(mut self, path: PathBuf) -> Self {
114 self.apidoc_path = Some(path);
115 self
116 }
117
118 pub fn with_bindings(mut self, path: PathBuf) -> Self {
120 self.bindings_path = Some(path);
121 self
122 }
123
124 pub fn with_apidoc_dir(mut self, path: PathBuf) -> Self {
126 self.apidoc_dir = Some(path);
127 self
128 }
129
130 pub fn with_debug(mut self, debug: bool) -> Self {
132 self.debug = debug;
133 self
134 }
135}
136
137#[derive(Debug, Clone, Default)]
141pub struct DebugOptions {
142 pub dump_apidoc_after_merge: Option<String>,
145 pub debug_type_inference: Vec<String>,
147}
148
149impl DebugOptions {
150 pub fn new() -> Self {
151 Self::default()
152 }
153
154 pub fn dump_apidoc(mut self, filter: impl Into<String>) -> Self {
156 self.dump_apidoc_after_merge = Some(filter.into());
157 self
158 }
159}
160
161struct CommonMacroBodyCollector {
168 targets: HashSet<InternedStr>,
169 bodies: HashMap<InternedStr, Vec<crate::token::Token>>,
170}
171
172impl CommonMacroBodyCollector {
173 fn new(targets: HashSet<InternedStr>) -> Self {
174 Self { targets, bodies: HashMap::new() }
175 }
176}
177
178impl MacroDefCallback for CommonMacroBodyCollector {
179 fn on_macro_defined(&mut self, def: &crate::macro_def::MacroDef) {
180 if self.targets.contains(&def.name) {
181 self.bodies.insert(def.name, def.body.clone());
182 }
183 }
184 fn into_any(self: Box<Self>) -> Box<dyn std::any::Any> { self }
185}
186
187#[derive(Debug, Clone, Default)]
189pub struct InferStats {
190 pub apidoc_from_comments: usize,
192 pub thx_dependent_count: usize,
194 pub c_fn_decl_count: usize,
196 pub c_fn_thx_count: usize,
198}
199
200pub struct InferResult {
202 pub infer_ctx: MacroInferContext,
204 pub fields_dict: FieldsDict,
206 pub enum_dict: EnumDict,
208 pub inline_fn_dict: InlineFnDict,
210 pub apidoc: ApidocDict,
212 pub rust_decl_dict: Option<RustDeclDict>,
214 pub c_fn_decl_dict: CFnDeclDict,
216 pub typedefs: TypedefDict,
218 pub global_const_dict: crate::global_const_dict::GlobalConstDict,
220 pub apidoc_patches: crate::apidoc_patches::ApidocPatchSet,
222 pub perl_build_mode: crate::perl_config::PerlBuildMode,
224 pub perlvar_dict: crate::perlvar_dict::PerlvarDict,
227 pub preprocessor: Preprocessor,
229 pub stats: InferStats,
231}
232
233pub fn run_inference_with_preprocessor(
244 mut pp: Preprocessor,
245 apidoc_path: Option<&Path>,
246 bindings_path: Option<&Path>,
247 debug_opts: Option<&DebugOptions>,
248 skip_codegen_lists: &[PathBuf],
249 perl_build_mode_override: Option<crate::perl_config::PerlBuildMode>,
250) -> Result<Option<InferResult>, InferError> {
251 let perl_build_mode = match perl_build_mode_override {
253 Some(m) => m,
254 None => crate::perl_config::PerlBuildMode::detect_from_perl_config()
255 .unwrap_or(crate::perl_config::PerlBuildMode::Threaded),
256 };
257 eprintln!("[perl-mode] {:?}", perl_build_mode);
258 let rust_decl_dict = if let Some(path) = bindings_path {
260 Some(RustDeclDict::parse_file(path)?)
261 } else {
262 None
263 };
264
265 if let Some(ref dict) = rust_decl_dict {
267 for name in dict.consts.keys() {
268 let interned = pp.interner_mut().intern(name);
269 pp.add_skip_expand_macro(interned);
270 }
271 dict.intern_names(pp.interner_mut());
275 }
276
277 {
280 let explicit_expand = ExplicitExpandSymbols::new(pp.interner_mut());
281 pp.add_explicit_expand_macros(explicit_expand.iter());
282 }
283
284 let mut fields_dict = FieldsDict::new();
286 let mut global_const_dict = crate::global_const_dict::GlobalConstDict::new();
287
288 let mut enum_dict = EnumDict::new();
290
291 pp.set_comment_callback(Box::new(ApidocCollector::new()));
293
294 let sv_head_id = pp.interner_mut().intern("_SV_HEAD");
296 pp.set_macro_called_callback(sv_head_id, Box::new(MacroCallWatcher::new()));
297
298 const COMMON_FIELD_MACROS: &[&str] = &["_XPV_HEAD", "_XPVCV_COMMON"];
306 let common_field_macro_ids: Vec<InternedStr> = COMMON_FIELD_MACROS
307 .iter()
308 .map(|name| {
309 let id = pp.interner_mut().intern(name);
310 pp.set_macro_called_callback(id, Box::new(MacroCallWatcher::new()));
311 id
312 })
313 .collect();
314 pp.set_macro_def_callback(Box::new(CommonMacroBodyCollector::new(
315 common_field_macro_ids.iter().copied().collect(),
316 )));
317
318 let pthx_id = pp.interner_mut().intern("pTHX_");
327 let pthx_no_comma_id = pp.interner_mut().intern("pTHX");
328 if perl_build_mode.is_threaded() {
329 pp.set_macro_called_callback(pthx_id, Box::new(MacroCallWatcher::new()));
330 pp.set_macro_called_callback(pthx_no_comma_id, Box::new(MacroCallWatcher::new()));
331 }
332
333 let mut c_fn_decl_dict = CFnDeclDict::new();
335
336 let mut parser = match Parser::new(&mut pp) {
338 Ok(p) => p,
339 Err(e) => return Err(InferError::Compile(e.with_files(pp.files()))),
340 };
341
342 let mut inline_fn_dict = InlineFnDict::new();
344
345 let parse_result = parser.parse_each_with_pp(|decl, loc, path, pp| {
349 let interner = pp.interner();
350 fields_dict.collect_from_external_decl(decl, decl.is_target(), interner);
351
352 global_const_dict.try_collect(decl, decl.is_target(), interner);
355
356 enum_dict.collect_from_external_decl(decl, decl.is_target(), interner);
358
359 if decl.is_target() {
361 if let ExternalDecl::FunctionDef(func_def) = decl {
362 inline_fn_dict.collect_from_function_def(func_def, interner);
363 }
364 }
365
366 if let ExternalDecl::Declaration(declaration) = decl {
368 let is_thx = check_macro_called(pp, pthx_id) || check_macro_called(pp, pthx_no_comma_id);
370
371 collect_function_declarations(
373 declaration,
374 &mut c_fn_decl_dict,
375 is_thx,
376 loc,
377 path,
378 interner,
379 );
380
381 reset_macro_called(pp, pthx_id);
383 reset_macro_called(pp, pthx_no_comma_id);
384 }
385
386 if decl.is_target() {
388 if let Some(struct_names) = extract_struct_names(decl) {
389 if let Some(cb) = pp.get_macro_called_callback(sv_head_id) {
391 if let Some(watcher) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
392 if watcher.take_called() {
393 let type_name = watcher.last_args()
395 .and_then(|args| args.first().cloned())
396 .unwrap_or_default();
397
398 for name in &struct_names {
399 fields_dict.add_sv_family_member_with_type(*name, &type_name);
401 }
402 }
403 }
404 }
405
406 for ¯o_id in &common_field_macro_ids {
408 if let Some(cb) = pp.get_macro_called_callback(macro_id) {
409 if let Some(watcher) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
410 if watcher.take_called() {
411 for name in &struct_names {
412 fields_dict.add_struct_uses_common_macro(*name, macro_id);
413 }
414 }
415 }
416 }
417 }
418 }
419 }
420 ControlFlow::Continue(())
421 });
422 if let Err(e) = parse_result {
423 drop(parser);
425 return Err(InferError::Compile(e.with_files(pp.files())));
426 }
427
428 let typedefs = parser.typedefs().clone();
430
431 let callback = pp.take_comment_callback().expect("callback should exist");
433 let apidoc_collector = callback
434 .into_any()
435 .downcast::<ApidocCollector>()
436 .expect("callback type mismatch");
437
438 let token_type_macros: Vec<InternedStr> = apidoc_collector
441 .token_type_macros()
442 .iter()
443 .map(|name| pp.interner_mut().intern(name))
444 .collect();
445
446 fields_dict.build_consistent_type_cache(pp.interner());
448
449 {
454 let collector = pp
455 .take_macro_def_callback()
456 .and_then(|cb| cb.into_any().downcast::<CommonMacroBodyCollector>().ok());
457 let mut macro_bodies: Vec<(InternedStr, Vec<crate::token::Token>)> = collector
458 .map(|c| c.bodies.into_iter().collect())
459 .unwrap_or_default();
460 let pthx_id = pp.interner_mut().intern("pTHX_");
464 let pthx_no_comma_id = pp.interner_mut().intern("pTHX");
465 for (_id, body) in macro_bodies.iter_mut() {
466 body.retain(|t| !matches!(&t.kind,
467 crate::token::TokenKind::Ident(id)
468 if *id == pthx_id || *id == pthx_no_comma_id));
469 }
470 let interner = pp.interner();
471 let files = pp.files().clone();
472 let typedefs_ref = typedefs.clone();
473 fields_dict.build_common_macro_fields(¯o_bodies, |body| {
474 crate::parser::parse_struct_members_from_tokens_ref(
475 body, interner, &files, &typedefs_ref,
476 ).map_err(crate::error::CompileError::from)
477 });
478 }
479
480 if let Some(ref dict) = rust_decl_dict {
484 fields_dict.build_common_field_rust_types(dict, pp.interner_mut());
485 }
486
487 fields_dict.build_common_macro_sv_family(pp.interner());
491
492 let mut apidoc = if let Some(path) = apidoc_path {
497 ApidocDict::load_auto(path)?
498 } else {
499 ApidocDict::new()
500 };
501 let apidoc_from_comments = apidoc_collector.len();
502 apidoc_collector.merge_into(&mut apidoc);
503
504 let mut apidoc_patches = if let Some(path) = apidoc_path {
511 crate::apidoc_patches::ApidocPatchSet::load_for_apidoc_path(path)?
512 } else {
513 crate::apidoc_patches::ApidocPatchSet::empty()
514 };
515 for list_path in skip_codegen_lists {
517 let added = apidoc_patches.merge_skip_list(list_path)?;
518 eprintln!(
519 "[apidoc-patches] merged {} skip entry(ies) from {}",
520 added, list_path.display()
521 );
522 }
523 if !apidoc_patches.is_empty() {
524 let applied = apidoc_patches.apply_to_apidoc(&mut apidoc);
525 if !apidoc_patches.source_paths.is_empty() {
526 let paths_str = apidoc_patches.source_paths.iter()
527 .map(|p| p.display().to_string())
528 .collect::<Vec<_>>()
529 .join(", ");
530 eprintln!(
531 "[apidoc-patches] loaded {} patch(es) from [{}] ({} return-type override applied, {} skip-codegen registered)",
532 apidoc_patches.count(),
533 paths_str,
534 applied.len(),
535 apidoc_patches.skip_codegen.len(),
536 );
537 }
538 }
539
540 apidoc.expand_type_macros(pp.macros(), pp.interner());
542
543 if let Some(opts) = debug_opts {
545 if let Some(filter) = &opts.dump_apidoc_after_merge {
546 apidoc.dump_filtered(filter);
547 return Ok(None);
548 }
549 }
550
551 let mut infer_ctx = MacroInferContext::new();
553
554 if let Some(opts) = debug_opts {
556 if !opts.debug_type_inference.is_empty() {
557 infer_ctx.set_debug_macros(opts.debug_type_inference.iter().cloned());
558 }
559 }
560
561 let sym_athx = pp.interner_mut().intern("aTHX");
563 let sym_tthx = pp.interner_mut().intern("tTHX");
564 let sym_my_perl = pp.interner_mut().intern("my_perl");
565 let thx_symbols = (sym_athx, sym_tthx, sym_my_perl);
566
567 let no_expand = NoExpandSymbols::new(pp.interner_mut());
569
570 {
573 let explicit_expand = ExplicitExpandSymbols::new(pp.interner_mut());
574 pp.add_explicit_expand_macros(explicit_expand.iter());
575 }
576 pp.add_explicit_expand_macros(token_type_macros.iter().copied());
577
578 infer_ctx.analyze_all_macros(
579 &mut pp,
580 Some(&apidoc),
581 Some(&apidoc_patches),
582 Some(&fields_dict),
583 rust_decl_dict.as_ref(),
584 Some(&mut inline_fn_dict),
585 Some(&c_fn_decl_dict),
586 &typedefs,
587 thx_symbols,
588 no_expand,
589 perl_build_mode,
590 );
591
592 let thx_dependent_count = infer_ctx.macros.values()
594 .filter(|info| info.is_target && info.is_thx_dependent)
595 .count();
596
597 let c_fn_decl_count = c_fn_decl_dict.len();
599 let c_fn_thx_count = c_fn_decl_dict.thx_count();
600
601 let stats = InferStats {
602 apidoc_from_comments,
603 thx_dependent_count,
604 c_fn_decl_count,
605 c_fn_thx_count,
606 };
607
608 infer_ctx.resolve_param_and_return_types(
610 pp.interner_mut(),
611 rust_decl_dict.as_ref(),
612 &inline_fn_dict,
613 );
614
615 Ok(Some(InferResult {
616 infer_ctx,
617 fields_dict,
618 enum_dict,
619 inline_fn_dict,
620 apidoc,
621 rust_decl_dict,
622 c_fn_decl_dict,
623 typedefs,
624 global_const_dict,
625 apidoc_patches,
626 perl_build_mode,
627 perlvar_dict: crate::perlvar_dict::PerlvarDict::new(),
630 preprocessor: pp,
631 stats,
632 }))
633}
634
635fn extract_struct_names(decl: &ExternalDecl) -> Option<Vec<InternedStr>> {
637 let declaration = match decl {
638 ExternalDecl::Declaration(d) => d,
639 _ => return None,
640 };
641
642 let mut names = Vec::new();
643
644 for type_spec in &declaration.specs.type_specs {
645 match type_spec {
646 TypeSpec::Struct(spec) | TypeSpec::Union(spec) => {
647 if spec.members.is_some() {
649 if let Some(name) = spec.name {
650 names.push(name);
651 }
652 }
653 }
654 _ => {}
655 }
656 }
657
658 if names.is_empty() {
659 None
660 } else {
661 Some(names)
662 }
663}
664
665fn check_macro_called(pp: &Preprocessor, macro_id: InternedStr) -> bool {
667 pp.get_macro_called_callback(macro_id)
668 .and_then(|cb| cb.as_any().downcast_ref::<MacroCallWatcher>())
669 .is_some_and(|w| w.was_called())
670}
671
672fn reset_macro_called(pp: &Preprocessor, macro_id: InternedStr) {
674 if let Some(cb) = pp.get_macro_called_callback(macro_id) {
676 if let Some(w) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
677 w.take_called(); }
679 }
680}
681
682fn collect_function_declarations(
684 declaration: &crate::ast::Declaration,
685 dict: &mut CFnDeclDict,
686 is_thx: bool,
687 loc: &crate::source::SourceLocation,
688 path: &std::path::Path,
689 interner: &crate::intern::StringInterner,
690) {
691 for init_decl in &declaration.declarators {
693 let declarator = &init_decl.declarator;
694
695 let param_list = declarator.derived.iter().find_map(|d| {
697 if let DerivedDecl::Function(params) = d {
698 Some(params)
699 } else {
700 None
701 }
702 });
703
704 if let Some(param_list) = param_list {
705 if let Some(name) = declarator.name {
706 let params: Vec<CParam> = param_list.params.iter().map(|param| {
708 let param_name = param.declarator.as_ref().and_then(|d| d.name);
709 let ty = type_specs_to_string(¶m.specs, interner);
710 CParam { name: param_name, ty }
711 }).collect();
712
713 let ret_ty = type_specs_to_string(&declaration.specs, interner);
715
716 let c_fn_decl = CFnDecl {
717 name,
718 params,
719 ret_ty,
720 is_thx,
721 is_target: declaration.is_target,
722 location: Some(format!("{}:{}", path.display(), loc.line)),
723 };
724 dict.insert(c_fn_decl);
725 }
726 }
727 }
728}
729
730fn type_specs_to_string(specs: &crate::ast::DeclSpecs, interner: &crate::intern::StringInterner) -> String {
732 use crate::ast::TypeSpec;
733
734 let mut parts = Vec::new();
735
736 for type_spec in &specs.type_specs {
737 match type_spec {
738 TypeSpec::Void => parts.push("void".to_string()),
739 TypeSpec::Char => parts.push("char".to_string()),
740 TypeSpec::Short => parts.push("short".to_string()),
741 TypeSpec::Int => parts.push("int".to_string()),
742 TypeSpec::Long => parts.push("long".to_string()),
743 TypeSpec::Float => parts.push("float".to_string()),
744 TypeSpec::Double => parts.push("double".to_string()),
745 TypeSpec::Signed => parts.push("signed".to_string()),
746 TypeSpec::Unsigned => parts.push("unsigned".to_string()),
747 TypeSpec::Bool => parts.push("bool".to_string()),
748 TypeSpec::Complex => parts.push("_Complex".to_string()),
749 TypeSpec::TypedefName(name) => parts.push(interner.get(*name).to_string()),
750 TypeSpec::Struct(spec) => {
751 if let Some(name) = spec.name {
752 parts.push(format!("struct {}", interner.get(name)));
753 } else {
754 parts.push("struct".to_string());
755 }
756 }
757 TypeSpec::Union(spec) => {
758 if let Some(name) = spec.name {
759 parts.push(format!("union {}", interner.get(name)));
760 } else {
761 parts.push("union".to_string());
762 }
763 }
764 TypeSpec::Enum(spec) => {
765 if let Some(name) = spec.name {
766 parts.push(format!("enum {}", interner.get(name)));
767 } else {
768 parts.push("enum".to_string());
769 }
770 }
771 TypeSpec::TypeofExpr(_) => parts.push("typeof(...)".to_string()),
772 TypeSpec::Int128 => parts.push("__int128".to_string()),
773 TypeSpec::Float16 => parts.push("_Float16".to_string()),
774 TypeSpec::Float32 => parts.push("_Float32".to_string()),
775 TypeSpec::Float64 => parts.push("_Float64".to_string()),
776 TypeSpec::Float128 => parts.push("_Float128".to_string()),
777 TypeSpec::Float32x => parts.push("_Float32x".to_string()),
778 TypeSpec::Float64x => parts.push("_Float64x".to_string()),
779 }
780 }
781
782 if parts.is_empty() {
783 "int".to_string() } else {
785 parts.join(" ")
786 }
787}