libperl_macrogen/perlvar_dict.rs
1//! `PerlvarDict` — collected PERLVAR/PERLVARI/PERLVARA/PERLVARIC entries.
2//!
3//! These entries are observed during Phase 1 (preprocessing) by registering
4//! a [`PerlvarCollector`] as a [`crate::MacroCalledCallback`] for each of the
5//! PERLVAR macro variants. The C preprocessor then routes every PERLVAR
6//! invocation it encounters in the include tree (`wrapper.h` → `perl.h` →
7//! `intrpvar.h` / `perlvars.h`) to the collector, so cpp guards (`#ifdef`,
8//! `#if defined(...)`) are evaluated correctly without our parser having to
9//! reimplement them.
10//!
11//! Phase 3 reads the dict from `InferResult` and emits one `PL_<name>!()`
12//! declarative macro per entry, formatted for the target Perl's threading
13//! mode (no `#[cfg]` in the output — see `docs/plan/README.md` §3.2 in the
14//! consumer project).
15
16use std::any::Any;
17use std::cell::RefCell;
18use std::collections::BTreeMap;
19use std::rc::Rc;
20
21use crate::preprocessor::MacroCalledCallback;
22use crate::token::Token;
23use crate::StringInterner;
24
25/// One PERLVAR-family entry. Sized intentionally small — the C type and
26/// initializer are kept as raw text because the emitter passes them through
27/// without manipulation. (See CLAUDE.md "Structure-First Type Handling":
28/// PERLVAR types are an explicit boundary case where the *consumer* of the
29/// emitted macro is the Rust compiler itself, not our type analysis.)
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct PerlvarEntry {
32 /// Bare variable name, without `PL_` prefix (e.g. `"stack_sp"`).
33 pub name: String,
34
35 /// `'I'` for per-interpreter (intrpvar.h) variables, `'G'` for
36 /// process-global (perlvars.h) variables. The character is taken
37 /// verbatim from the macro's first argument.
38 pub prefix: char,
39
40 /// Variant of the PERLVAR macro that introduced this entry.
41 pub kind: PerlvarKind,
42
43 /// The C type as a single string (e.g. `"SV **"`, `"HV *"`,
44 /// `"perl_mutex"`). Whitespace inside is normalized to single spaces.
45 pub c_type: String,
46}
47
48/// Variant of the PERLVAR macro that produced an entry.
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub enum PerlvarKind {
51 /// `PERLVAR(prefix, name, type)` — plain declaration.
52 Var,
53 /// `PERLVARI(prefix, name, type, init)` — declaration with initializer.
54 /// The init expression is preserved verbatim.
55 Init { init_expr: String },
56 /// `PERLVARA(prefix, name, n, type)` — fixed-length array.
57 Array { length: ArrayLength },
58 /// `PERLVARIC(prefix, name, type, init)` — const declaration with
59 /// initializer. (Currently unused by upstream Perl but supported for
60 /// completeness.)
61 Const { init_expr: String },
62}
63
64/// Length argument of a PERLVARA. Usually a numeric literal, but the
65/// preprocessor will hand us `SVt_LAST`-style `#define`d symbols too.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum ArrayLength {
68 Literal(usize),
69 Symbolic(String),
70}
71
72/// Collection of PERLVAR entries observed during preprocessing.
73///
74/// Iteration order is alphabetical by name (via `BTreeMap`) so that the
75/// emitted Rust output is reproducible across builds.
76#[derive(Debug, Default, Clone)]
77pub struct PerlvarDict {
78 entries: BTreeMap<String, PerlvarEntry>,
79}
80
81impl PerlvarDict {
82 pub fn new() -> Self {
83 Self::default()
84 }
85
86 pub fn insert(&mut self, entry: PerlvarEntry) {
87 self.entries.insert(entry.name.clone(), entry);
88 }
89
90 pub fn get(&self, name: &str) -> Option<&PerlvarEntry> {
91 self.entries.get(name)
92 }
93
94 pub fn iter(&self) -> impl Iterator<Item = &PerlvarEntry> {
95 self.entries.values()
96 }
97
98 pub fn len(&self) -> usize {
99 self.entries.len()
100 }
101
102 pub fn is_empty(&self) -> bool {
103 self.entries.is_empty()
104 }
105}
106
107// ─────────────────────────────────────────────────────────────────
108// Collector callback (registered with the preprocessor)
109// ─────────────────────────────────────────────────────────────────
110
111/// Which PERLVAR variant a single collector instance is watching.
112/// One collector is registered per macro name (PERLVAR/PERLVARI/PERLVARA/
113/// PERLVARIC); they all share the same `Rc<RefCell<PerlvarDict>>`.
114#[derive(Debug, Clone, Copy)]
115enum CollectorKind {
116 Var,
117 Init,
118 Array,
119 Const,
120}
121
122/// Callback that records PERLVAR invocations into a shared dict.
123///
124/// Construct a set of four collectors (one per PERLVAR variant) sharing a
125/// dict via [`PerlvarCollector::new_set`], register each on the preprocessor
126/// with [`crate::Preprocessor::set_macro_called_callback`], and read the
127/// dict back from the shared `Rc` after preprocessing completes.
128pub struct PerlvarCollector {
129 dict: Rc<RefCell<PerlvarDict>>,
130 kind: CollectorKind,
131}
132
133impl PerlvarCollector {
134 /// Returns the shared dict and a 4-tuple of collectors, one per macro
135 /// variant. Register each collector under its corresponding macro name.
136 pub fn new_set() -> (
137 Rc<RefCell<PerlvarDict>>,
138 PerlvarCollector,
139 PerlvarCollector,
140 PerlvarCollector,
141 PerlvarCollector,
142 ) {
143 let dict = Rc::new(RefCell::new(PerlvarDict::new()));
144 (
145 dict.clone(),
146 PerlvarCollector { dict: dict.clone(), kind: CollectorKind::Var },
147 PerlvarCollector { dict: dict.clone(), kind: CollectorKind::Init },
148 PerlvarCollector { dict: dict.clone(), kind: CollectorKind::Array },
149 PerlvarCollector { dict, kind: CollectorKind::Const },
150 )
151 }
152}
153
154impl MacroCalledCallback for PerlvarCollector {
155 fn on_macro_called(&mut self, args: Option<&[Vec<Token>]>, interner: &StringInterner) {
156 let Some(args) = args else { return };
157 // All four PERLVAR variants take at least 3 args.
158 if args.len() < 3 {
159 return;
160 }
161 let prefix = parse_prefix(&args[0], interner);
162 let name = arg_to_string(&args[1], interner);
163 let entry = match self.kind {
164 CollectorKind::Var => {
165 // PERLVAR(prefix, name, type)
166 let c_type = arg_to_string(&args[2], interner);
167 PerlvarEntry {
168 name,
169 prefix,
170 kind: PerlvarKind::Var,
171 c_type,
172 }
173 }
174 CollectorKind::Init => {
175 // PERLVARI(prefix, name, type, init)
176 if args.len() < 4 {
177 return;
178 }
179 let c_type = arg_to_string(&args[2], interner);
180 let init_expr = arg_to_string(&args[3], interner);
181 PerlvarEntry {
182 name,
183 prefix,
184 kind: PerlvarKind::Init { init_expr },
185 c_type,
186 }
187 }
188 CollectorKind::Array => {
189 // PERLVARA(prefix, name, n, type)
190 if args.len() < 4 {
191 return;
192 }
193 let length = parse_array_length(&args[2], interner);
194 let c_type = arg_to_string(&args[3], interner);
195 PerlvarEntry {
196 name,
197 prefix,
198 kind: PerlvarKind::Array { length },
199 c_type,
200 }
201 }
202 CollectorKind::Const => {
203 // PERLVARIC(prefix, name, type, init)
204 if args.len() < 4 {
205 return;
206 }
207 let c_type = arg_to_string(&args[2], interner);
208 let init_expr = arg_to_string(&args[3], interner);
209 PerlvarEntry {
210 name,
211 prefix,
212 kind: PerlvarKind::Const { init_expr },
213 c_type,
214 }
215 }
216 };
217 self.dict.borrow_mut().insert(entry);
218 }
219
220 fn as_any(&self) -> &dyn Any {
221 self
222 }
223 fn as_any_mut(&mut self) -> &mut dyn Any {
224 self
225 }
226}
227
228// ─────────────────────────────────────────────────────────────────
229// Helpers
230// ─────────────────────────────────────────────────────────────────
231
232/// Convert a comma-separated arg's token list back into a single
233/// whitespace-normalized string. We deliberately lose layout because the
234/// emitter doesn't need it; doc comments embed this verbatim.
235fn arg_to_string(tokens: &[Token], interner: &StringInterner) -> String {
236 let mut out = String::new();
237 let mut prev_was_word = false;
238 for t in tokens {
239 let s = t.kind.format(interner);
240 if s.is_empty() {
241 continue;
242 }
243 let starts_word = s
244 .chars()
245 .next()
246 .is_some_and(|c| c.is_alphanumeric() || c == '_');
247 if prev_was_word && starts_word {
248 out.push(' ');
249 }
250 out.push_str(&s);
251 prev_was_word = s
252 .chars()
253 .last()
254 .is_some_and(|c| c.is_alphanumeric() || c == '_');
255 }
256 out.trim().to_string()
257}
258
259fn parse_prefix(tokens: &[Token], interner: &StringInterner) -> char {
260 let s = arg_to_string(tokens, interner);
261 s.chars().next().unwrap_or('?')
262}
263
264fn parse_array_length(tokens: &[Token], interner: &StringInterner) -> ArrayLength {
265 let s = arg_to_string(tokens, interner);
266 if let Ok(n) = s.parse::<usize>() {
267 ArrayLength::Literal(n)
268 } else {
269 ArrayLength::Symbolic(s)
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276
277 #[test]
278 fn dict_iteration_is_sorted() {
279 let mut d = PerlvarDict::new();
280 for n in ["zebra", "alpha", "mango"] {
281 d.insert(PerlvarEntry {
282 name: n.to_string(),
283 prefix: 'I',
284 kind: PerlvarKind::Var,
285 c_type: "int".to_string(),
286 });
287 }
288 let names: Vec<&str> = d.iter().map(|e| e.name.as_str()).collect();
289 assert_eq!(names, vec!["alpha", "mango", "zebra"]);
290 }
291
292 #[test]
293 fn array_length_classification() {
294 assert_eq!(parse_array_length_str("4"), ArrayLength::Literal(4));
295 assert_eq!(
296 parse_array_length_str("SVt_LAST"),
297 ArrayLength::Symbolic("SVt_LAST".to_string())
298 );
299 }
300
301 fn parse_array_length_str(s: &str) -> ArrayLength {
302 if let Ok(n) = s.parse::<usize>() {
303 ArrayLength::Literal(n)
304 } else {
305 ArrayLength::Symbolic(s.to_string())
306 }
307 }
308}