Skip to main content

perl_semantic_analyzer/analysis/
value_shape_inferrer.rs

1//! Lightweight value-shape inference from Perl AST patterns.
2//!
3//! Walks the AST to infer [`ValueShape`] approximations for variables based
4//! on common Perl idioms:
5//!
6//! | Perl pattern                             | Inferred shape                                |
7//! |------------------------------------------|-----------------------------------------------|
8//! | `Foo->new(...)`                          | `Object { "Foo", Medium }`                    |
9//! | `bless $ref, 'Pkg'`                      | `Object { "Pkg", Low }`                       |
10//! | `$self` in method body                   | `Object { <enclosing package>, Medium }`       |
11//! | `sub method($self, ...)`                 | `Object { <enclosing package>, High }`         |
12//! | `my ($self) = @_`                        | `Object { <enclosing package>, Medium }`       |
13//! | `DBI->connect(...)`                      | `Object { "DBI::db", Medium }`                |
14//! | `$dbh->prepare(...)` after DBI connect    | `Object { "DBI::st", Medium }`                |
15//! | unknown                                  | `Unknown`                                     |
16//!
17//! The inferrer does **not** perform full type inference — it recognises
18//! syntactic patterns and assigns conservative shapes.
19
20use crate::ast::{Node, NodeKind};
21use perl_semantic_facts::{Confidence, EntityId, FileId, ValueShape};
22use std::collections::HashMap;
23
24/// Inferrer that walks an AST to produce `(EntityId, ValueShape)` pairs.
25///
26/// Each pair maps a variable's deterministic entity ID to its inferred shape.
27pub struct ValueShapeInferrer;
28
29impl ValueShapeInferrer {
30    /// Walk the entire AST and return `(EntityId, ValueShape)` pairs for
31    /// every variable whose shape can be inferred from syntactic patterns.
32    pub fn infer(ast: &Node, _file_id: FileId) -> Vec<(EntityId, ValueShape)> {
33        let mut state = InferrerState {
34            current_package: "main".to_string(),
35            in_method: false,
36            variable_shapes: HashMap::new(),
37            results: Vec::new(),
38        };
39        state.walk(ast);
40        state.results
41    }
42}
43
44/// Internal state for the recursive AST walk.
45struct InferrerState {
46    /// Current package context (updated when `package Foo;` is encountered).
47    current_package: String,
48    /// Whether we are currently inside a subroutine/method body.
49    in_method: bool,
50    /// Current lexical receiver-shape environment, keyed by scalar variable name.
51    variable_shapes: HashMap<String, ValueShape>,
52    /// Accumulated (EntityId, ValueShape) pairs.
53    results: Vec<(EntityId, ValueShape)>,
54}
55
56impl InferrerState {
57    /// Recursive AST walker.
58    fn walk(&mut self, node: &Node) {
59        match &node.kind {
60            // Statement containers — walk children in order.
61            NodeKind::Program { statements } | NodeKind::Block { statements } => {
62                for stmt in statements {
63                    self.walk(stmt);
64                }
65                return;
66            }
67
68            // `package Foo { ... }` (block form) — scoped package context.
69            NodeKind::Package { name, block: Some(block), .. } => {
70                let prev = self.current_package.clone();
71                self.current_package = name.clone();
72                self.walk(block);
73                self.current_package = prev;
74                return;
75            }
76
77            // `package Foo;` (semicolon form) — updates current package.
78            NodeKind::Package { name, block: None, .. } => {
79                self.current_package = name.clone();
80                return;
81            }
82
83            // Subroutine / method body — track method-like scope, record
84            // signature receivers, and keep receiver shapes local to the body.
85            NodeKind::Subroutine { signature, body, .. }
86            | NodeKind::Method { signature, body, .. } => {
87                let prev_in_method = self.in_method;
88                let prev_shapes = std::mem::take(&mut self.variable_shapes);
89                self.in_method = true;
90                if let Some(signature) = signature {
91                    self.record_signature_receiver(signature);
92                }
93                self.walk(body);
94                self.in_method = prev_in_method;
95                self.variable_shapes = prev_shapes;
96                return;
97            }
98
99            // Variable declaration with initializer:
100            // `my $obj = Foo->new(...)` or `my $obj = bless ...`
101            NodeKind::VariableDeclaration { variable, initializer: Some(init), .. } => {
102                if let Some(shape) = self.infer_from_rhs(init) {
103                    self.record_variable_shape(variable, shape);
104                }
105            }
106
107            // List unpacking convention for invocants:
108            // `my ($self) = @_`.
109            NodeKind::VariableListDeclaration { variables, initializer: Some(init), .. }
110                if self.in_method && is_argument_array(init) =>
111            {
112                if let Some(first) = variables.first() {
113                    self.record_self_like_variable(first, Confidence::Medium);
114                }
115            }
116
117            // Assignment: `$obj = Foo->new(...)` or `$obj = bless ...`
118            NodeKind::Assignment { lhs, rhs, .. } => {
119                if let Some(shape) = self.infer_from_rhs(rhs) {
120                    self.record_variable_shape(lhs, shape);
121                }
122            }
123
124            // `$self` reference inside a method body.
125            NodeKind::Variable { sigil, name } if sigil == "$" && is_self_like_name(name) => {
126                if self.in_method {
127                    self.record_self_like_variable(node, Confidence::Medium);
128                }
129            }
130
131            _ => {}
132        }
133
134        // Recurse into children for all other node types.
135        for child in node.children() {
136            self.walk(child);
137        }
138    }
139
140    /// Try to infer a [`ValueShape`] from the right-hand side of an
141    /// assignment or variable declaration.
142    fn infer_from_rhs(&self, rhs: &Node) -> Option<ValueShape> {
143        match &rhs.kind {
144            // `Foo->new(...)` — constructor call.
145            NodeKind::MethodCall { object, method, .. } if method == "new" => {
146                if let Some(pkg) = package_name_from_node(object) {
147                    return Some(ValueShape::Object {
148                        package: pkg,
149                        confidence: Confidence::Medium,
150                    });
151                }
152                None
153            }
154
155            // `DBI->connect(...)` — common DBI database handle constructor.
156            NodeKind::MethodCall { object, method, .. } if method == "connect" => {
157                if package_name_from_node(object).as_deref() == Some("DBI") {
158                    return Some(ValueShape::Object {
159                        package: "DBI::db".to_string(),
160                        confidence: Confidence::Medium,
161                    });
162                }
163                None
164            }
165
166            // `$dbh->prepare(...)` — common DBI statement handle constructor.
167            NodeKind::MethodCall { object, method, .. } if method == "prepare" => {
168                if self.receiver_is_dbi_database_handle(object) {
169                    return Some(ValueShape::Object {
170                        package: "DBI::st".to_string(),
171                        confidence: Confidence::Medium,
172                    });
173                }
174                None
175            }
176
177            // `bless $ref, 'Pkg'` — bless call.
178            NodeKind::FunctionCall { name, args } if name == "bless" => {
179                // Second argument is the package name.
180                if let Some(pkg_node) = args.get(1) {
181                    if let Some(pkg) = string_value(pkg_node) {
182                        return Some(ValueShape::Object {
183                            package: pkg,
184                            confidence: Confidence::Low,
185                        });
186                    }
187                }
188                // `bless $ref` with no explicit package — uses current package.
189                if args.len() == 1 {
190                    return Some(ValueShape::Object {
191                        package: self.current_package.clone(),
192                        confidence: Confidence::Low,
193                    });
194                }
195                None
196            }
197
198            _ => None,
199        }
200    }
201
202    fn record_signature_receiver(&mut self, signature: &Node) {
203        let NodeKind::Signature { parameters } = &signature.kind else {
204            return;
205        };
206        let Some(first) = parameters.first() else {
207            return;
208        };
209        let Some(variable) = parameter_variable(first) else {
210            return;
211        };
212        self.record_self_like_variable(variable, Confidence::High);
213    }
214
215    fn record_self_like_variable(&mut self, variable: &Node, confidence: Confidence) {
216        let Some(name) = scalar_variable_name(variable) else {
217            return;
218        };
219        if !is_self_like_name(name) {
220            return;
221        }
222
223        self.record_variable_shape(
224            variable,
225            ValueShape::Object { package: self.current_package.clone(), confidence },
226        );
227    }
228
229    fn record_variable_shape(&mut self, variable: &Node, shape: ValueShape) {
230        if let Some(name) = scalar_variable_name(variable) {
231            self.variable_shapes.insert(name.to_string(), shape.clone());
232        }
233        let entity_id = entity_id_from_variable(variable);
234        self.results.push((entity_id, shape));
235    }
236
237    fn receiver_is_dbi_database_handle(&self, receiver: &Node) -> bool {
238        let Some(name) = scalar_variable_name(receiver) else {
239            return false;
240        };
241
242        self.variable_shapes.get(name).is_some_and(
243            |shape| matches!(shape, ValueShape::Object { package, .. } if package == "DBI::db"),
244        )
245    }
246}
247
248// ── Helpers ─────────────────────────────────────────────────────────
249
250/// Extract a package name from a node that represents a class/package
251/// (e.g. the `Foo` in `Foo->new`).
252fn package_name_from_node(node: &Node) -> Option<String> {
253    match &node.kind {
254        NodeKind::Identifier { name } => Some(name.clone()),
255        NodeKind::String { value, .. } => normalize_package_string(value),
256        _ => None,
257    }
258}
259
260/// Extract a string value from a string literal node.
261fn string_value(node: &Node) -> Option<String> {
262    match &node.kind {
263        NodeKind::String { value, .. } => normalize_package_string(value),
264        NodeKind::Identifier { name } => Some(name.clone()),
265        _ => None,
266    }
267}
268
269fn scalar_variable_name(node: &Node) -> Option<&str> {
270    match &node.kind {
271        NodeKind::Variable { sigil, name } if sigil == "$" => Some(name.as_str()),
272        NodeKind::VariableWithAttributes { variable, .. } => scalar_variable_name(variable),
273        _ => None,
274    }
275}
276
277fn parameter_variable(node: &Node) -> Option<&Node> {
278    match &node.kind {
279        NodeKind::MandatoryParameter { variable }
280        | NodeKind::OptionalParameter { variable, .. }
281        | NodeKind::SlurpyParameter { variable }
282        | NodeKind::NamedParameter { variable } => Some(variable),
283        _ => None,
284    }
285}
286
287fn is_self_like_name(name: &str) -> bool {
288    matches!(name, "self" | "this" | "class")
289}
290
291fn is_argument_array(node: &Node) -> bool {
292    matches!(&node.kind, NodeKind::Variable { sigil, name } if sigil == "@" && name == "_")
293}
294
295fn normalize_package_string(value: &str) -> Option<String> {
296    let normalized = value.trim().trim_matches('\'').trim_matches('"').trim();
297    if normalized.is_empty() { None } else { Some(normalized.to_string()) }
298}
299
300/// Derive a deterministic [`EntityId`] from a variable node using its
301/// byte-offset span.
302fn entity_id_from_variable(node: &Node) -> EntityId {
303    entity_id_from_node(node)
304}
305
306/// Derive a deterministic [`EntityId`] from a node's byte-offset span.
307///
308/// Uses a simple FNV-1a–style hash to produce a stable ID.
309fn entity_id_from_node(node: &Node) -> EntityId {
310    const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
311    const FNV_PRIME: u64 = 0x0100_0000_01b3;
312
313    let mut hash = FNV_OFFSET;
314    for byte in (node.location.start as u64).to_le_bytes() {
315        hash ^= u64::from(byte);
316        hash = hash.wrapping_mul(FNV_PRIME);
317    }
318    for byte in (node.location.end as u64).to_le_bytes() {
319        hash ^= u64::from(byte);
320        hash = hash.wrapping_mul(FNV_PRIME);
321    }
322    EntityId(hash)
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    use crate::Parser;
329
330    /// Parse Perl source and infer value shapes.
331    fn parse_and_infer(code: &str) -> Vec<(EntityId, ValueShape)> {
332        let mut parser = Parser::new(code);
333        let ast = match parser.parse() {
334            Ok(ast) => ast,
335            Err(_) => return Vec::new(),
336        };
337        ValueShapeInferrer::infer(&ast, FileId(1))
338    }
339
340    /// Helper: find the first Object shape in results.
341    fn first_object(results: &[(EntityId, ValueShape)]) -> Option<(&str, Confidence)> {
342        for (_, shape) in results {
343            if let ValueShape::Object { package, confidence } = shape {
344                return Some((package.as_str(), *confidence));
345            }
346        }
347        None
348    }
349
350    fn object_for_package(
351        results: &[(EntityId, ValueShape)],
352        expected_package: &str,
353    ) -> Option<Confidence> {
354        results.iter().find_map(|(_, shape)| {
355            if let ValueShape::Object { package, confidence } = shape {
356                if package == expected_package {
357                    return Some(*confidence);
358                }
359            }
360            None
361        })
362    }
363
364    // ── Constructor call: Foo->new(...) ─────────────────────────────────
365
366    #[test]
367    fn constructor_call_infers_object_medium() -> Result<(), String> {
368        let results = parse_and_infer("my $obj = Foo->new();\n");
369        let (pkg, conf) = first_object(&results).ok_or("expected Object shape from Foo->new()")?;
370        assert_eq!(pkg, "Foo");
371        assert_eq!(conf, Confidence::Medium);
372        Ok(())
373    }
374
375    #[test]
376    fn qualified_constructor_call_infers_object() -> Result<(), String> {
377        let results = parse_and_infer("my $obj = My::App->new();\n");
378        let (pkg, conf) =
379            first_object(&results).ok_or("expected Object shape from My::App->new()")?;
380        assert_eq!(pkg, "My::App");
381        assert_eq!(conf, Confidence::Medium);
382        Ok(())
383    }
384
385    // ── bless $ref, 'Pkg' ───────────────────────────────────────────────
386
387    #[test]
388    fn bless_with_package_infers_object_low() -> Result<(), String> {
389        let code = "package Foo;\nsub new { my $self = bless {}, 'Foo'; }\n";
390        let results = parse_and_infer(code);
391        let (pkg, conf) =
392            first_object(&results).ok_or("expected Object shape from bless {}, 'Foo'")?;
393        assert_eq!(pkg, "Foo");
394        assert_eq!(conf, Confidence::Low);
395        Ok(())
396    }
397
398    // ── $self in method body ────────────────────────────────────────────
399
400    #[test]
401    fn self_in_method_infers_enclosing_package() -> Result<(), String> {
402        let code = "package Bar;\nsub greet { my $msg = $self->name(); }\n";
403        let results = parse_and_infer(code);
404        // $self should be inferred as Object { Bar, Medium }
405        let has_bar_medium = results.iter().any(|(_, shape)| {
406            matches!(shape, ValueShape::Object { package, confidence }
407                if package == "Bar" && *confidence == Confidence::Medium)
408        });
409        assert!(
410            has_bar_medium,
411            "expected $self to infer Object {{ Bar, Medium }}, got {results:?}"
412        );
413        Ok(())
414    }
415
416    #[test]
417    fn signature_self_infers_enclosing_package_high() -> Result<(), String> {
418        let code = "package Widget;\nsub render($self, $name) { return $name; }\n";
419        let results = parse_and_infer(code);
420        let confidence =
421            object_for_package(&results, "Widget").ok_or("expected signature self shape")?;
422        assert_eq!(confidence, Confidence::High);
423        Ok(())
424    }
425
426    #[test]
427    fn argument_unpack_self_infers_enclosing_package() -> Result<(), String> {
428        let code = "package Widget;\nsub render { my ($self, $name) = @_; return $name; }\n";
429        let results = parse_and_infer(code);
430        let confidence =
431            object_for_package(&results, "Widget").ok_or("expected @_ self unpack shape")?;
432        assert_eq!(confidence, Confidence::Medium);
433        Ok(())
434    }
435
436    // ── DBI receiver-shape idioms ───────────────────────────────────────
437
438    #[test]
439    fn dbi_connect_infers_database_handle() -> Result<(), String> {
440        let results = parse_and_infer("my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:');\n");
441        let confidence =
442            object_for_package(&results, "DBI::db").ok_or("expected DBI::db handle shape")?;
443        assert_eq!(confidence, Confidence::Medium);
444        Ok(())
445    }
446
447    #[test]
448    fn dbh_prepare_infers_statement_handle_after_connect() -> Result<(), String> {
449        let code = "my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:');\nmy $sth = $dbh->prepare('select 1');\n";
450        let results = parse_and_infer(code);
451        let confidence =
452            object_for_package(&results, "DBI::st").ok_or("expected DBI::st statement shape")?;
453        assert_eq!(confidence, Confidence::Medium);
454        Ok(())
455    }
456
457    #[test]
458    fn prepare_on_unknown_receiver_does_not_infer_statement_handle() -> Result<(), String> {
459        let results = parse_and_infer("my $sth = $thing->prepare('select 1');\n");
460        assert!(
461            object_for_package(&results, "DBI::st").is_none(),
462            "unknown prepare receiver should not infer DBI::st: {results:?}"
463        );
464        Ok(())
465    }
466
467    #[test]
468    fn prepare_on_dbh_name_without_known_connect_does_not_infer_statement_handle()
469    -> Result<(), String> {
470        let results = parse_and_infer("my $sth = $dbh->prepare('select 1');\n");
471        assert!(
472            object_for_package(&results, "DBI::st").is_none(),
473            "$dbh naming alone should not infer DBI::st: {results:?}"
474        );
475        Ok(())
476    }
477
478    // ── Unknown fallback ────────────────────────────────────────────────
479
480    #[test]
481    fn plain_scalar_produces_no_shape() -> Result<(), String> {
482        let results = parse_and_infer("my $x = 42;\n");
483        // No Object shapes should be inferred for a plain scalar.
484        assert!(first_object(&results).is_none(), "plain scalar should not produce Object shape");
485        Ok(())
486    }
487
488    // ── Multiple packages ───────────────────────────────────────────────
489
490    #[test]
491    fn multiple_packages_track_context() -> Result<(), String> {
492        let code = r#"
493package Alpha;
494sub new { my $self = bless {}, 'Alpha'; }
495
496package Beta;
497sub new { my $self = bless {}, 'Beta'; }
498"#;
499        let results = parse_and_infer(code);
500        let has_alpha = results.iter().any(
501            |(_, shape)| matches!(shape, ValueShape::Object { package, .. } if package == "Alpha"),
502        );
503        let has_beta = results.iter().any(
504            |(_, shape)| matches!(shape, ValueShape::Object { package, .. } if package == "Beta"),
505        );
506        assert!(has_alpha, "expected Alpha object shape");
507        assert!(has_beta, "expected Beta object shape");
508        Ok(())
509    }
510}