Skip to main content

libperl_macrogen/
perlvar_dict.rs

1//! `PerlvarDict` — collected PERLVAR/PERLVARI/PERLVARA/PERLVARIC entries.
2//!
3//! These entries are observed during Phase 1 (preprocessing) by registering
4//! a [`PerlvarCollector`] as a [`crate::MacroCalledCallback`] for each of the
5//! PERLVAR macro variants. The C preprocessor then routes every PERLVAR
6//! invocation it encounters in the include tree (`wrapper.h` → `perl.h` →
7//! `intrpvar.h` / `perlvars.h`) to the collector, so cpp guards (`#ifdef`,
8//! `#if defined(...)`) are evaluated correctly without our parser having to
9//! reimplement them.
10//!
11//! Phase 3 reads the dict from `InferResult` and emits one `PL_<name>!()`
12//! declarative macro per entry, formatted for the target Perl's threading
13//! mode (no `#[cfg]` in the output — see `docs/plan/README.md` §3.2 in the
14//! consumer project).
15
16use std::any::Any;
17use std::cell::RefCell;
18use std::collections::BTreeMap;
19use std::rc::Rc;
20
21use crate::preprocessor::MacroCalledCallback;
22use crate::token::Token;
23use crate::StringInterner;
24
25/// One PERLVAR-family entry. Sized intentionally small — the C type and
26/// initializer are kept as raw text because the emitter passes them through
27/// without manipulation. (See CLAUDE.md "Structure-First Type Handling":
28/// PERLVAR types are an explicit boundary case where the *consumer* of the
29/// emitted macro is the Rust compiler itself, not our type analysis.)
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct PerlvarEntry {
32    /// Bare variable name, without `PL_` prefix (e.g. `"stack_sp"`).
33    pub name: String,
34
35    /// `'I'` for per-interpreter (intrpvar.h) variables, `'G'` for
36    /// process-global (perlvars.h) variables. The character is taken
37    /// verbatim from the macro's first argument.
38    pub prefix: char,
39
40    /// Variant of the PERLVAR macro that introduced this entry.
41    pub kind: PerlvarKind,
42
43    /// The C type as a single string (e.g. `"SV **"`, `"HV *"`,
44    /// `"perl_mutex"`). Whitespace inside is normalized to single spaces.
45    pub c_type: String,
46}
47
48/// Variant of the PERLVAR macro that produced an entry.
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub enum PerlvarKind {
51    /// `PERLVAR(prefix, name, type)` — plain declaration.
52    Var,
53    /// `PERLVARI(prefix, name, type, init)` — declaration with initializer.
54    /// The init expression is preserved verbatim.
55    Init { init_expr: String },
56    /// `PERLVARA(prefix, name, n, type)` — fixed-length array.
57    Array { length: ArrayLength },
58    /// `PERLVARIC(prefix, name, type, init)` — const declaration with
59    /// initializer. (Currently unused by upstream Perl but supported for
60    /// completeness.)
61    Const { init_expr: String },
62}
63
64/// Length argument of a PERLVARA. Usually a numeric literal, but the
65/// preprocessor will hand us `SVt_LAST`-style `#define`d symbols too.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum ArrayLength {
68    Literal(usize),
69    Symbolic(String),
70}
71
72/// Collection of PERLVAR entries observed during preprocessing.
73///
74/// Iteration order is alphabetical by name (via `BTreeMap`) so that the
75/// emitted Rust output is reproducible across builds.
76#[derive(Debug, Default, Clone)]
77pub struct PerlvarDict {
78    entries: BTreeMap<String, PerlvarEntry>,
79}
80
81impl PerlvarDict {
82    pub fn new() -> Self {
83        Self::default()
84    }
85
86    pub fn insert(&mut self, entry: PerlvarEntry) {
87        self.entries.insert(entry.name.clone(), entry);
88    }
89
90    pub fn get(&self, name: &str) -> Option<&PerlvarEntry> {
91        self.entries.get(name)
92    }
93
94    pub fn iter(&self) -> impl Iterator<Item = &PerlvarEntry> {
95        self.entries.values()
96    }
97
98    pub fn len(&self) -> usize {
99        self.entries.len()
100    }
101
102    pub fn is_empty(&self) -> bool {
103        self.entries.is_empty()
104    }
105}
106
107// ─────────────────────────────────────────────────────────────────
108// Collector callback (registered with the preprocessor)
109// ─────────────────────────────────────────────────────────────────
110
111/// Which PERLVAR variant a single collector instance is watching.
112/// One collector is registered per macro name (PERLVAR/PERLVARI/PERLVARA/
113/// PERLVARIC); they all share the same `Rc<RefCell<PerlvarDict>>`.
114#[derive(Debug, Clone, Copy)]
115enum CollectorKind {
116    Var,
117    Init,
118    Array,
119    Const,
120}
121
122/// Callback that records PERLVAR invocations into a shared dict.
123///
124/// Construct a set of four collectors (one per PERLVAR variant) sharing a
125/// dict via [`PerlvarCollector::new_set`], register each on the preprocessor
126/// with [`crate::Preprocessor::set_macro_called_callback`], and read the
127/// dict back from the shared `Rc` after preprocessing completes.
128pub struct PerlvarCollector {
129    dict: Rc<RefCell<PerlvarDict>>,
130    kind: CollectorKind,
131}
132
133impl PerlvarCollector {
134    /// Returns the shared dict and a 4-tuple of collectors, one per macro
135    /// variant. Register each collector under its corresponding macro name.
136    pub fn new_set() -> (
137        Rc<RefCell<PerlvarDict>>,
138        PerlvarCollector,
139        PerlvarCollector,
140        PerlvarCollector,
141        PerlvarCollector,
142    ) {
143        let dict = Rc::new(RefCell::new(PerlvarDict::new()));
144        (
145            dict.clone(),
146            PerlvarCollector { dict: dict.clone(), kind: CollectorKind::Var },
147            PerlvarCollector { dict: dict.clone(), kind: CollectorKind::Init },
148            PerlvarCollector { dict: dict.clone(), kind: CollectorKind::Array },
149            PerlvarCollector { dict, kind: CollectorKind::Const },
150        )
151    }
152}
153
154impl MacroCalledCallback for PerlvarCollector {
155    fn on_macro_called(&mut self, args: Option<&[Vec<Token>]>, interner: &StringInterner) {
156        let Some(args) = args else { return };
157        // All four PERLVAR variants take at least 3 args.
158        if args.len() < 3 {
159            return;
160        }
161        let prefix = parse_prefix(&args[0], interner);
162        let name = arg_to_string(&args[1], interner);
163        let entry = match self.kind {
164            CollectorKind::Var => {
165                // PERLVAR(prefix, name, type)
166                let c_type = arg_to_string(&args[2], interner);
167                PerlvarEntry {
168                    name,
169                    prefix,
170                    kind: PerlvarKind::Var,
171                    c_type,
172                }
173            }
174            CollectorKind::Init => {
175                // PERLVARI(prefix, name, type, init)
176                if args.len() < 4 {
177                    return;
178                }
179                let c_type = arg_to_string(&args[2], interner);
180                let init_expr = arg_to_string(&args[3], interner);
181                PerlvarEntry {
182                    name,
183                    prefix,
184                    kind: PerlvarKind::Init { init_expr },
185                    c_type,
186                }
187            }
188            CollectorKind::Array => {
189                // PERLVARA(prefix, name, n, type)
190                if args.len() < 4 {
191                    return;
192                }
193                let length = parse_array_length(&args[2], interner);
194                let c_type = arg_to_string(&args[3], interner);
195                PerlvarEntry {
196                    name,
197                    prefix,
198                    kind: PerlvarKind::Array { length },
199                    c_type,
200                }
201            }
202            CollectorKind::Const => {
203                // PERLVARIC(prefix, name, type, init)
204                if args.len() < 4 {
205                    return;
206                }
207                let c_type = arg_to_string(&args[2], interner);
208                let init_expr = arg_to_string(&args[3], interner);
209                PerlvarEntry {
210                    name,
211                    prefix,
212                    kind: PerlvarKind::Const { init_expr },
213                    c_type,
214                }
215            }
216        };
217        self.dict.borrow_mut().insert(entry);
218    }
219
220    fn as_any(&self) -> &dyn Any {
221        self
222    }
223    fn as_any_mut(&mut self) -> &mut dyn Any {
224        self
225    }
226}
227
228// ─────────────────────────────────────────────────────────────────
229// Helpers
230// ─────────────────────────────────────────────────────────────────
231
232/// Convert a comma-separated arg's token list back into a single
233/// whitespace-normalized string. We deliberately lose layout because the
234/// emitter doesn't need it; doc comments embed this verbatim.
235fn arg_to_string(tokens: &[Token], interner: &StringInterner) -> String {
236    let mut out = String::new();
237    let mut prev_was_word = false;
238    for t in tokens {
239        let s = t.kind.format(interner);
240        if s.is_empty() {
241            continue;
242        }
243        let starts_word = s
244            .chars()
245            .next()
246            .is_some_and(|c| c.is_alphanumeric() || c == '_');
247        if prev_was_word && starts_word {
248            out.push(' ');
249        }
250        out.push_str(&s);
251        prev_was_word = s
252            .chars()
253            .last()
254            .is_some_and(|c| c.is_alphanumeric() || c == '_');
255    }
256    out.trim().to_string()
257}
258
259fn parse_prefix(tokens: &[Token], interner: &StringInterner) -> char {
260    let s = arg_to_string(tokens, interner);
261    s.chars().next().unwrap_or('?')
262}
263
264fn parse_array_length(tokens: &[Token], interner: &StringInterner) -> ArrayLength {
265    let s = arg_to_string(tokens, interner);
266    if let Ok(n) = s.parse::<usize>() {
267        ArrayLength::Literal(n)
268    } else {
269        ArrayLength::Symbolic(s)
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn dict_iteration_is_sorted() {
279        let mut d = PerlvarDict::new();
280        for n in ["zebra", "alpha", "mango"] {
281            d.insert(PerlvarEntry {
282                name: n.to_string(),
283                prefix: 'I',
284                kind: PerlvarKind::Var,
285                c_type: "int".to_string(),
286            });
287        }
288        let names: Vec<&str> = d.iter().map(|e| e.name.as_str()).collect();
289        assert_eq!(names, vec!["alpha", "mango", "zebra"]);
290    }
291
292    #[test]
293    fn array_length_classification() {
294        assert_eq!(parse_array_length_str("4"), ArrayLength::Literal(4));
295        assert_eq!(
296            parse_array_length_str("SVt_LAST"),
297            ArrayLength::Symbolic("SVt_LAST".to_string())
298        );
299    }
300
301    fn parse_array_length_str(s: &str) -> ArrayLength {
302        if let Ok(n) = s.parse::<usize>() {
303            ArrayLength::Literal(n)
304        } else {
305            ArrayLength::Symbolic(s.to_string())
306        }
307    }
308}