Skip to main content

perl_semantic_analyzer/analysis/
value_shape_inferrer.rs

1//! Lightweight value-shape inference from Perl AST patterns.
2//!
3//! Walks the AST to infer [`ValueShape`] approximations for variables based
4//! on common Perl idioms:
5//!
6//! | Perl pattern                             | Inferred shape                                |
7//! |------------------------------------------|-----------------------------------------------|
8//! | `Foo->new(...)`                          | `Object { "Foo", High }`                      |
9//! | `bless $ref, 'Pkg'`                      | `Object { "Pkg", Low }`                       |
10//! | `$self` in method body                   | `Object { <enclosing package>, Medium }`       |
11//! | unknown                                  | `Unknown`                                     |
12//!
13//! The inferrer does **not** perform full type inference — it recognises
14//! syntactic patterns and assigns conservative shapes.
15
16use crate::ast::{Node, NodeKind};
17use perl_semantic_facts::{Confidence, EntityId, FileId, ValueShape};
18
19/// Inferrer that walks an AST to produce `(EntityId, ValueShape)` pairs.
20///
21/// Each pair maps a variable's deterministic entity ID to its inferred shape.
22pub struct ValueShapeInferrer;
23
24impl ValueShapeInferrer {
25    /// Walk the entire AST and return `(EntityId, ValueShape)` pairs for
26    /// every variable whose shape can be inferred from syntactic patterns.
27    pub fn infer(ast: &Node, _file_id: FileId) -> Vec<(EntityId, ValueShape)> {
28        let mut state = InferrerState {
29            current_package: "main".to_string(),
30            in_method: false,
31            results: Vec::new(),
32        };
33        state.walk(ast);
34        state.results
35    }
36}
37
38/// Internal state for the recursive AST walk.
39struct InferrerState {
40    /// Current package context (updated when `package Foo;` is encountered).
41    current_package: String,
42    /// Whether we are currently inside a subroutine/method body.
43    in_method: bool,
44    /// Accumulated (EntityId, ValueShape) pairs.
45    results: Vec<(EntityId, ValueShape)>,
46}
47
48impl InferrerState {
49    /// Recursive AST walker.
50    fn walk(&mut self, node: &Node) {
51        match &node.kind {
52            // Statement containers — walk children in order.
53            NodeKind::Program { statements } | NodeKind::Block { statements } => {
54                for stmt in statements {
55                    self.walk(stmt);
56                }
57                return;
58            }
59
60            // `package Foo { ... }` (block form) — scoped package context.
61            NodeKind::Package { name, block: Some(block), .. } => {
62                let prev = self.current_package.clone();
63                self.current_package = name.clone();
64                self.walk(block);
65                self.current_package = prev;
66                return;
67            }
68
69            // `package Foo;` (semicolon form) — updates current package.
70            NodeKind::Package { name, block: None, .. } => {
71                self.current_package = name.clone();
72                return;
73            }
74
75            // Subroutine / method body — track that we are inside a method
76            // so `$self` references can be inferred.
77            NodeKind::Subroutine { body, .. } | NodeKind::Method { body, .. } => {
78                let prev_in_method = self.in_method;
79                self.in_method = true;
80                self.walk(body);
81                self.in_method = prev_in_method;
82                return;
83            }
84
85            // Variable declaration with initializer:
86            // `my $obj = Foo->new(...)` or `my $obj = bless ...`
87            NodeKind::VariableDeclaration { variable, initializer: Some(init), .. } => {
88                if let Some(shape) = self.infer_from_rhs(init) {
89                    let entity_id = entity_id_from_variable(variable);
90                    self.results.push((entity_id, shape));
91                }
92            }
93
94            // Assignment: `$obj = Foo->new(...)` or `$obj = bless ...`
95            NodeKind::Assignment { lhs, rhs, .. } => {
96                if let Some(shape) = self.infer_from_rhs(rhs) {
97                    let entity_id = entity_id_from_variable(lhs);
98                    self.results.push((entity_id, shape));
99                }
100            }
101
102            // `$self` reference inside a method body.
103            NodeKind::Variable { sigil, name } if sigil == "$" && name == "self" => {
104                if self.in_method {
105                    let entity_id = entity_id_from_node(node);
106                    self.results.push((
107                        entity_id,
108                        ValueShape::Object {
109                            package: self.current_package.clone(),
110                            confidence: Confidence::Medium,
111                        },
112                    ));
113                }
114            }
115
116            _ => {}
117        }
118
119        // Recurse into children for all other node types.
120        for child in node.children() {
121            self.walk(child);
122        }
123    }
124
125    /// Try to infer a [`ValueShape`] from the right-hand side of an
126    /// assignment or variable declaration.
127    fn infer_from_rhs(&self, rhs: &Node) -> Option<ValueShape> {
128        match &rhs.kind {
129            // `Foo->new(...)` — constructor call.
130            NodeKind::MethodCall { object, method, .. } if method == "new" => {
131                if let Some(pkg) = package_name_from_node(object) {
132                    return Some(ValueShape::Object { package: pkg, confidence: Confidence::High });
133                }
134                None
135            }
136
137            // `bless $ref, 'Pkg'` — bless call.
138            NodeKind::FunctionCall { name, args } if name == "bless" => {
139                // Second argument is the package name.
140                if let Some(pkg_node) = args.get(1) {
141                    if let Some(pkg) = string_value(pkg_node) {
142                        return Some(ValueShape::Object {
143                            package: pkg,
144                            confidence: Confidence::Low,
145                        });
146                    }
147                }
148                // `bless $ref` with no explicit package — uses current package.
149                if args.len() == 1 {
150                    return Some(ValueShape::Object {
151                        package: self.current_package.clone(),
152                        confidence: Confidence::Low,
153                    });
154                }
155                None
156            }
157
158            _ => None,
159        }
160    }
161}
162
163// ── Helpers ─────────────────────────────────────────────────────────
164
165/// Extract a package name from a node that represents a class/package
166/// (e.g. the `Foo` in `Foo->new`).
167fn package_name_from_node(node: &Node) -> Option<String> {
168    match &node.kind {
169        NodeKind::Identifier { name } => Some(name.clone()),
170        NodeKind::String { value, .. } => normalize_package_string(value),
171        _ => None,
172    }
173}
174
175/// Extract a string value from a string literal node.
176fn string_value(node: &Node) -> Option<String> {
177    match &node.kind {
178        NodeKind::String { value, .. } => normalize_package_string(value),
179        NodeKind::Identifier { name } => Some(name.clone()),
180        _ => None,
181    }
182}
183
184fn normalize_package_string(value: &str) -> Option<String> {
185    let normalized = value.trim().trim_matches('\'').trim_matches('"').trim();
186    if normalized.is_empty() { None } else { Some(normalized.to_string()) }
187}
188
189/// Derive a deterministic [`EntityId`] from a variable node using its
190/// byte-offset span.
191fn entity_id_from_variable(node: &Node) -> EntityId {
192    entity_id_from_node(node)
193}
194
195/// Derive a deterministic [`EntityId`] from a node's byte-offset span.
196///
197/// Uses a simple FNV-1a–style hash to produce a stable ID.
198fn entity_id_from_node(node: &Node) -> EntityId {
199    const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
200    const FNV_PRIME: u64 = 0x0100_0000_01b3;
201
202    let mut hash = FNV_OFFSET;
203    for byte in (node.location.start as u64).to_le_bytes() {
204        hash ^= u64::from(byte);
205        hash = hash.wrapping_mul(FNV_PRIME);
206    }
207    for byte in (node.location.end as u64).to_le_bytes() {
208        hash ^= u64::from(byte);
209        hash = hash.wrapping_mul(FNV_PRIME);
210    }
211    EntityId(hash)
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use crate::Parser;
218
219    /// Parse Perl source and infer value shapes.
220    fn parse_and_infer(code: &str) -> Vec<(EntityId, ValueShape)> {
221        let mut parser = Parser::new(code);
222        let ast = match parser.parse() {
223            Ok(ast) => ast,
224            Err(_) => return Vec::new(),
225        };
226        ValueShapeInferrer::infer(&ast, FileId(1))
227    }
228
229    /// Helper: find the first Object shape in results.
230    fn first_object(results: &[(EntityId, ValueShape)]) -> Option<(&str, Confidence)> {
231        for (_, shape) in results {
232            if let ValueShape::Object { package, confidence } = shape {
233                return Some((package.as_str(), *confidence));
234            }
235        }
236        None
237    }
238
239    // ── Constructor call: Foo->new(...) ─────────────────────────────────
240
241    #[test]
242    fn constructor_call_infers_object_high() -> Result<(), String> {
243        let results = parse_and_infer("my $obj = Foo->new();\n");
244        let (pkg, conf) = first_object(&results).ok_or("expected Object shape from Foo->new()")?;
245        assert_eq!(pkg, "Foo");
246        assert_eq!(conf, Confidence::High);
247        Ok(())
248    }
249
250    #[test]
251    fn qualified_constructor_call_infers_object() -> Result<(), String> {
252        let results = parse_and_infer("my $obj = My::App->new();\n");
253        let (pkg, conf) =
254            first_object(&results).ok_or("expected Object shape from My::App->new()")?;
255        assert_eq!(pkg, "My::App");
256        assert_eq!(conf, Confidence::High);
257        Ok(())
258    }
259
260    // ── bless $ref, 'Pkg' ───────────────────────────────────────────────
261
262    #[test]
263    fn bless_with_package_infers_object_low() -> Result<(), String> {
264        let code = "package Foo;\nsub new { my $self = bless {}, 'Foo'; }\n";
265        let results = parse_and_infer(code);
266        let (pkg, conf) =
267            first_object(&results).ok_or("expected Object shape from bless {}, 'Foo'")?;
268        assert_eq!(pkg, "Foo");
269        assert_eq!(conf, Confidence::Low);
270        Ok(())
271    }
272
273    // ── $self in method body ────────────────────────────────────────────
274
275    #[test]
276    fn self_in_method_infers_enclosing_package() -> Result<(), String> {
277        let code = "package Bar;\nsub greet { my $msg = $self->name(); }\n";
278        let results = parse_and_infer(code);
279        // $self should be inferred as Object { Bar, Medium }
280        let has_bar_medium = results.iter().any(|(_, shape)| {
281            matches!(shape, ValueShape::Object { package, confidence }
282                if package == "Bar" && *confidence == Confidence::Medium)
283        });
284        assert!(
285            has_bar_medium,
286            "expected $self to infer Object {{ Bar, Medium }}, got {results:?}"
287        );
288        Ok(())
289    }
290
291    // ── Unknown fallback ────────────────────────────────────────────────
292
293    #[test]
294    fn plain_scalar_produces_no_shape() -> Result<(), String> {
295        let results = parse_and_infer("my $x = 42;\n");
296        // No Object shapes should be inferred for a plain scalar.
297        assert!(first_object(&results).is_none(), "plain scalar should not produce Object shape");
298        Ok(())
299    }
300
301    // ── Multiple packages ───────────────────────────────────────────────
302
303    #[test]
304    fn multiple_packages_track_context() -> Result<(), String> {
305        let code = r#"
306package Alpha;
307sub new { my $self = bless {}, 'Alpha'; }
308
309package Beta;
310sub new { my $self = bless {}, 'Beta'; }
311"#;
312        let results = parse_and_infer(code);
313        let has_alpha = results.iter().any(
314            |(_, shape)| matches!(shape, ValueShape::Object { package, .. } if package == "Alpha"),
315        );
316        let has_beta = results.iter().any(
317            |(_, shape)| matches!(shape, ValueShape::Object { package, .. } if package == "Beta"),
318        );
319        assert!(has_alpha, "expected Alpha object shape");
320        assert!(has_beta, "expected Beta object shape");
321        Ok(())
322    }
323}