Skip to main content

libperl_macrogen/
lib.rs

1//! # libperl-macrogen
2//!
3//! Generate Rust FFI bindings for the things `bindgen` can't see:
4//! C **macro functions** and **`static inline`** definitions in
5//! Perl's header tree.
6//!
7//! `rust-bindgen` is the standard for translating C declarations to
8//! Rust, but it deliberately skips macro-shaped function definitions
9//! (because they have no fixed type signature) and produces no Rust
10//! body for `static inline` functions (because their definitions live
11//! in headers, not the linked library). For wrapping libperl that
12//! gap is huge — much of the public-looking API (`SvIV`, `newRV_inc`,
13//! `PL_stack_base`, hundreds more) is exposed as macros or
14//! `static inline` only.
15//!
16//! `libperl-macrogen` complements `bindgen`: it lex / parse /
17//! type-infers the relevant headers and emits Rust wrappers like
18//!
19//! ```text
20//! pub unsafe fn SvIV(my_perl: *mut PerlInterpreter, sv: *mut SV) -> IV {
21//!     unsafe { Perl_SvIV(my_perl, sv) }
22//! }
23//! ```
24//!
25//! plus declarative macros for `PERLVAR`-driven globals (so the same
26//! `PL_stack_base!(my_perl)` source compiles against threaded and
27//! non-threaded Perl).
28//!
29//! ## Library API
30//!
31//! The high-level entry point is the [`Pipeline`] builder, which
32//! drives a header file through the preprocess → infer → codegen
33//! stages and writes a Rust source file:
34//!
35//! ```no_run
36//! use libperl_macrogen::Pipeline;
37//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
38//! let mut output = std::fs::File::create("macro_bindings.rs")?;
39//! Pipeline::builder("xs-wrapper.h")
40//!     .with_auto_perl_config()?
41//!     .with_bindings("bindings.rs")     // bindgen output for type info
42//!     .with_codegen_defaults()
43//!     .build()?
44//!     .generate(&mut output)?;
45//! # Ok(())
46//! # }
47//! ```
48//!
49//! See [`PipelineBuilder`] for the full set of options
50//! (skip-list, extra include paths, codegen knobs, ...).
51//!
52//! ## CLI
53//!
54//! Installing the crate also gives you a `libperl-macrogen` binary
55//! for one-off / inspection use. Run with `--help` for the option
56//! summary.
57//!
58//! ## Apidoc data
59//!
60//! The crate bundles a pre-extracted snapshot of perlapi documentation
61//! (`apidoc.tar.gz`, ~1.9 MiB compressed) that the type inferencer
62//! consults during macro-wrapper generation. This means **no network
63//! access is required at build time** — works under docs.rs's
64//! `--network none` sandbox, in air-gapped CI, etc.
65//!
66//! For advanced use (e.g. testing an unreleased apidoc dataset on an
67//! offline mirror), set the `LIBPERL_APIDOC_URL` environment variable
68//! to override and download from there instead.
69//!
70//! ## Status
71//!
72//! Pre-1.0 — focused on the libperl-rs use case. Wider header-tree
73//! coverage and stable APIs come after libperl-rs hits 1.0.
74
75pub mod apidoc;
76pub mod apidoc_data;
77pub mod apidoc_patches;
78pub mod ast;
79pub mod c_fn_decl;
80pub mod error;
81pub mod enum_dict;
82pub mod fields_dict;
83pub mod global_const_dict;
84pub mod infer_api;
85pub mod inline_fn;
86pub mod intern;
87pub mod lexer;
88pub mod macro_def;
89pub mod macro_infer;
90pub mod parser;
91pub mod perl_config;
92pub mod perlvar_dict;
93pub mod perlvar_emitter;
94pub mod pipeline;
95pub mod pp_expr;
96pub mod preprocessor;
97pub mod rust_codegen;
98pub mod rust_decl;
99pub mod static_array_emitter;
100pub mod struct_emitter;
101pub mod semantic;
102pub mod sexp;
103pub mod syn_codegen;
104pub mod source;
105pub mod token;
106pub mod token_source;
107pub mod type_env;
108pub mod type_repr;
109pub mod unified_type;
110
111// 主要な型を再エクスポート
112pub use apidoc::{
113    find_apidoc_dir_from, resolve_apidoc_path,
114    ApidocArg, ApidocCollector, ApidocDict, ApidocEntry, ApidocFlags, ApidocResolveError, ApidocStats, Nullability,
115};
116pub use infer_api::{
117    run_inference_with_preprocessor,
118    DebugOptions, InferConfig, InferError, InferResult, InferStats, TypedefDict,
119};
120pub use ast::*;
121pub use error::{CompileError, DisplayLocation, LexError, PPError, ParseError, Result};
122pub use fields_dict::FieldsDict;
123pub use inline_fn::InlineFnDict;
124pub use intern::{InternedStr, StringInterner};
125pub use rust_decl::RustDeclDict;
126pub use lexer::{IdentResolver, Interning, Lexer, LookupOnly, MutableLexer, ReadOnlyLexer};
127pub use macro_def::{MacroDef, MacroKind, MacroTable};
128pub use macro_infer::{
129    convert_assert_calls_in_compound_stmt, detect_assert_kind, InferStatus, MacroInferContext,
130    MacroInferInfo, MacroInferStats, NoExpandSymbols, ParseResult,
131};
132pub use parser::{parse_expression_from_tokens, parse_expression_from_tokens_ref, parse_type_from_string, Parser};
133pub use perl_config::{
134    build_pp_config_for_perl, get_default_target_dir, get_perl_config, get_perl_version,
135    PerlConfig, PerlConfigError,
136};
137pub use perlvar_dict::{
138    ArrayLength, PerlvarCollector, PerlvarDict, PerlvarEntry, PerlvarKind,
139};
140pub use preprocessor::{
141    CallbackPair, CommentCallback, MacroCalledCallback, MacroCallWatcher, MacroDefCallback,
142    PPConfig, Preprocessor,
143};
144pub use semantic::{SemanticAnalyzer, Symbol, SymbolKind, Type};
145pub use sexp::{SexpPrinter, TypedSexpPrinter};
146pub use source::{FileId, FileRegistry, SourceLocation};
147pub use token::{Comment, CommentKind, Token, TokenKind};
148pub use token_source::{TokenSlice, TokenSliceRef, TokenSource};
149pub use type_env::{ParamLink, TypeConstraint, TypeEnv};
150pub use type_repr::{
151    CDerivedType, CPrimitiveKind, CTypeSource, CTypeSpecs, InferredType,
152    IntSize as TypeReprIntSize, RustPrimitiveKind, RustTypeRepr, RustTypeSource, TypeRepr,
153};
154pub use unified_type::{IntSize, SourcedType, TypeSource, UnifiedType};
155pub use rust_codegen::{CodegenConfig, CodegenDriver, CodegenStats, GeneratedCode, GenerateStatus, RustCodegen};
156pub use pipeline::{
157    Pipeline, PipelineBuilder, PipelineError,
158    PreprocessConfig, InferConfig as PipelineInferConfig, CodegenConfig as PipelineCodegenConfig,
159    PreprocessedPipeline, InferredPipeline, GeneratedPipeline,
160};
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165    use std::path::PathBuf;
166
167    #[test]
168    fn test_basic_lexer_integration() {
169        let source = b"int main(void) { return 0; }";
170
171        let mut files = FileRegistry::new();
172        let file_id = files.register(PathBuf::from("test.c"));
173
174        let mut interner = StringInterner::new();
175        let mut lexer = Lexer::new(source, file_id, &mut interner);
176
177        let mut tokens = Vec::new();
178        loop {
179            let token = lexer.next_token().unwrap();
180            if matches!(token.kind, TokenKind::Eof) {
181                break;
182            }
183            tokens.push(token);
184        }
185
186        // int main ( void ) { return 0 ; }
187        assert_eq!(tokens.len(), 10);
188        // キーワードはキーワードトークンとして返される
189        assert!(matches!(tokens[0].kind, TokenKind::KwInt));
190        assert!(matches!(tokens[1].kind, TokenKind::Ident(_)));  // main is identifier
191        assert!(matches!(tokens[2].kind, TokenKind::LParen));
192        assert!(matches!(tokens[3].kind, TokenKind::KwVoid));
193        assert!(matches!(tokens[4].kind, TokenKind::RParen));
194        assert!(matches!(tokens[5].kind, TokenKind::LBrace));
195        assert!(matches!(tokens[6].kind, TokenKind::KwReturn));
196        assert!(matches!(tokens[7].kind, TokenKind::IntLit(0)));
197        assert!(matches!(tokens[8].kind, TokenKind::Semi));
198        assert!(matches!(tokens[9].kind, TokenKind::RBrace));
199
200        // 識別子の内容を確認
201        if let TokenKind::Ident(id) = tokens[1].kind {
202            assert_eq!(interner.get(id), "main");
203        } else {
204            panic!("Expected identifier for 'main'");
205        }
206    }
207
208    #[test]
209    fn test_comment_preservation() {
210        let source = b"// doc comment\nint x;";
211
212        let mut files = FileRegistry::new();
213        let file_id = files.register(PathBuf::from("test.c"));
214
215        let mut interner = StringInterner::new();
216        let mut lexer = Lexer::new(source, file_id, &mut interner);
217
218        // 最初に改行トークンが来る(コメントはその前)
219        let newline = lexer.next_token().unwrap();
220        assert!(matches!(newline.kind, TokenKind::Newline));
221        assert_eq!(newline.leading_comments.len(), 1);
222        assert!(newline.leading_comments[0].text.contains("doc comment"));
223
224        let token = lexer.next_token().unwrap();
225        // キーワードはキーワードトークンとして返される
226        assert!(matches!(token.kind, TokenKind::KwInt));
227    }
228}