perl_semantic_analyzer/analysis/
value_shape_inferrer.rs1use crate::ast::{Node, NodeKind};
21use perl_semantic_facts::{Confidence, EntityId, FileId, ValueShape};
22use std::collections::HashMap;
23
24pub struct ValueShapeInferrer;
28
29impl ValueShapeInferrer {
30 pub fn infer(ast: &Node, _file_id: FileId) -> Vec<(EntityId, ValueShape)> {
33 let mut state = InferrerState {
34 current_package: "main".to_string(),
35 in_method: false,
36 variable_shapes: HashMap::new(),
37 results: Vec::new(),
38 };
39 state.walk(ast);
40 state.results
41 }
42}
43
44struct InferrerState {
46 current_package: String,
48 in_method: bool,
50 variable_shapes: HashMap<String, ValueShape>,
52 results: Vec<(EntityId, ValueShape)>,
54}
55
56impl InferrerState {
57 fn walk(&mut self, node: &Node) {
59 match &node.kind {
60 NodeKind::Program { statements } | NodeKind::Block { statements } => {
62 for stmt in statements {
63 self.walk(stmt);
64 }
65 return;
66 }
67
68 NodeKind::Package { name, block: Some(block), .. } => {
70 let prev = self.current_package.clone();
71 self.current_package = name.clone();
72 self.walk(block);
73 self.current_package = prev;
74 return;
75 }
76
77 NodeKind::Package { name, block: None, .. } => {
79 self.current_package = name.clone();
80 return;
81 }
82
83 NodeKind::Subroutine { signature, body, .. }
86 | NodeKind::Method { signature, body, .. } => {
87 let prev_in_method = self.in_method;
88 let prev_shapes = std::mem::take(&mut self.variable_shapes);
89 self.in_method = true;
90 if let Some(signature) = signature {
91 self.record_signature_receiver(signature);
92 }
93 self.walk(body);
94 self.in_method = prev_in_method;
95 self.variable_shapes = prev_shapes;
96 return;
97 }
98
99 NodeKind::VariableDeclaration { variable, initializer: Some(init), .. } => {
102 if let Some(shape) = self.infer_from_rhs(init) {
103 self.record_variable_shape(variable, shape);
104 }
105 }
106
107 NodeKind::VariableListDeclaration { variables, initializer: Some(init), .. }
110 if self.in_method && is_argument_array(init) =>
111 {
112 if let Some(first) = variables.first() {
113 self.record_self_like_variable(first, Confidence::Medium);
114 }
115 }
116
117 NodeKind::Assignment { lhs, rhs, .. } => {
119 if let Some(shape) = self.infer_from_rhs(rhs) {
120 self.record_variable_shape(lhs, shape);
121 }
122 }
123
124 NodeKind::Variable { sigil, name } if sigil == "$" && is_self_like_name(name) => {
126 if self.in_method {
127 self.record_self_like_variable(node, Confidence::Medium);
128 }
129 }
130
131 _ => {}
132 }
133
134 for child in node.children() {
136 self.walk(child);
137 }
138 }
139
140 fn infer_from_rhs(&self, rhs: &Node) -> Option<ValueShape> {
143 match &rhs.kind {
144 NodeKind::MethodCall { object, method, .. } if method == "new" => {
146 if let Some(pkg) = package_name_from_node(object) {
147 return Some(ValueShape::Object {
148 package: pkg,
149 confidence: Confidence::Medium,
150 });
151 }
152 None
153 }
154
155 NodeKind::MethodCall { object, method, .. } if method == "connect" => {
157 if package_name_from_node(object).as_deref() == Some("DBI") {
158 return Some(ValueShape::Object {
159 package: "DBI::db".to_string(),
160 confidence: Confidence::Medium,
161 });
162 }
163 None
164 }
165
166 NodeKind::MethodCall { object, method, .. } if method == "prepare" => {
168 if self.receiver_is_dbi_database_handle(object) {
169 return Some(ValueShape::Object {
170 package: "DBI::st".to_string(),
171 confidence: Confidence::Medium,
172 });
173 }
174 None
175 }
176
177 NodeKind::FunctionCall { name, args } if name == "bless" => {
179 if let Some(pkg_node) = args.get(1) {
181 if let Some(pkg) = string_value(pkg_node) {
182 return Some(ValueShape::Object {
183 package: pkg,
184 confidence: Confidence::Low,
185 });
186 }
187 }
188 if args.len() == 1 {
190 return Some(ValueShape::Object {
191 package: self.current_package.clone(),
192 confidence: Confidence::Low,
193 });
194 }
195 None
196 }
197
198 _ => None,
199 }
200 }
201
202 fn record_signature_receiver(&mut self, signature: &Node) {
203 let NodeKind::Signature { parameters } = &signature.kind else {
204 return;
205 };
206 let Some(first) = parameters.first() else {
207 return;
208 };
209 let Some(variable) = parameter_variable(first) else {
210 return;
211 };
212 self.record_self_like_variable(variable, Confidence::High);
213 }
214
215 fn record_self_like_variable(&mut self, variable: &Node, confidence: Confidence) {
216 let Some(name) = scalar_variable_name(variable) else {
217 return;
218 };
219 if !is_self_like_name(name) {
220 return;
221 }
222
223 self.record_variable_shape(
224 variable,
225 ValueShape::Object { package: self.current_package.clone(), confidence },
226 );
227 }
228
229 fn record_variable_shape(&mut self, variable: &Node, shape: ValueShape) {
230 if let Some(name) = scalar_variable_name(variable) {
231 self.variable_shapes.insert(name.to_string(), shape.clone());
232 }
233 let entity_id = entity_id_from_variable(variable);
234 self.results.push((entity_id, shape));
235 }
236
237 fn receiver_is_dbi_database_handle(&self, receiver: &Node) -> bool {
238 let Some(name) = scalar_variable_name(receiver) else {
239 return false;
240 };
241
242 self.variable_shapes.get(name).is_some_and(
243 |shape| matches!(shape, ValueShape::Object { package, .. } if package == "DBI::db"),
244 )
245 }
246}
247
248fn package_name_from_node(node: &Node) -> Option<String> {
253 match &node.kind {
254 NodeKind::Identifier { name } => Some(name.clone()),
255 NodeKind::String { value, .. } => normalize_package_string(value),
256 _ => None,
257 }
258}
259
260fn string_value(node: &Node) -> Option<String> {
262 match &node.kind {
263 NodeKind::String { value, .. } => normalize_package_string(value),
264 NodeKind::Identifier { name } => Some(name.clone()),
265 _ => None,
266 }
267}
268
269fn scalar_variable_name(node: &Node) -> Option<&str> {
270 match &node.kind {
271 NodeKind::Variable { sigil, name } if sigil == "$" => Some(name.as_str()),
272 NodeKind::VariableWithAttributes { variable, .. } => scalar_variable_name(variable),
273 _ => None,
274 }
275}
276
277fn parameter_variable(node: &Node) -> Option<&Node> {
278 match &node.kind {
279 NodeKind::MandatoryParameter { variable }
280 | NodeKind::OptionalParameter { variable, .. }
281 | NodeKind::SlurpyParameter { variable }
282 | NodeKind::NamedParameter { variable } => Some(variable),
283 _ => None,
284 }
285}
286
287fn is_self_like_name(name: &str) -> bool {
288 matches!(name, "self" | "this" | "class")
289}
290
291fn is_argument_array(node: &Node) -> bool {
292 matches!(&node.kind, NodeKind::Variable { sigil, name } if sigil == "@" && name == "_")
293}
294
295fn normalize_package_string(value: &str) -> Option<String> {
296 let normalized = value.trim().trim_matches('\'').trim_matches('"').trim();
297 if normalized.is_empty() { None } else { Some(normalized.to_string()) }
298}
299
300fn entity_id_from_variable(node: &Node) -> EntityId {
303 entity_id_from_node(node)
304}
305
306fn entity_id_from_node(node: &Node) -> EntityId {
310 const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
311 const FNV_PRIME: u64 = 0x0100_0000_01b3;
312
313 let mut hash = FNV_OFFSET;
314 for byte in (node.location.start as u64).to_le_bytes() {
315 hash ^= u64::from(byte);
316 hash = hash.wrapping_mul(FNV_PRIME);
317 }
318 for byte in (node.location.end as u64).to_le_bytes() {
319 hash ^= u64::from(byte);
320 hash = hash.wrapping_mul(FNV_PRIME);
321 }
322 EntityId(hash)
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328 use crate::Parser;
329
330 fn parse_and_infer(code: &str) -> Vec<(EntityId, ValueShape)> {
332 let mut parser = Parser::new(code);
333 let ast = match parser.parse() {
334 Ok(ast) => ast,
335 Err(_) => return Vec::new(),
336 };
337 ValueShapeInferrer::infer(&ast, FileId(1))
338 }
339
340 fn first_object(results: &[(EntityId, ValueShape)]) -> Option<(&str, Confidence)> {
342 for (_, shape) in results {
343 if let ValueShape::Object { package, confidence } = shape {
344 return Some((package.as_str(), *confidence));
345 }
346 }
347 None
348 }
349
350 fn object_for_package(
351 results: &[(EntityId, ValueShape)],
352 expected_package: &str,
353 ) -> Option<Confidence> {
354 results.iter().find_map(|(_, shape)| {
355 if let ValueShape::Object { package, confidence } = shape {
356 if package == expected_package {
357 return Some(*confidence);
358 }
359 }
360 None
361 })
362 }
363
364 #[test]
367 fn constructor_call_infers_object_medium() -> Result<(), String> {
368 let results = parse_and_infer("my $obj = Foo->new();\n");
369 let (pkg, conf) = first_object(&results).ok_or("expected Object shape from Foo->new()")?;
370 assert_eq!(pkg, "Foo");
371 assert_eq!(conf, Confidence::Medium);
372 Ok(())
373 }
374
375 #[test]
376 fn qualified_constructor_call_infers_object() -> Result<(), String> {
377 let results = parse_and_infer("my $obj = My::App->new();\n");
378 let (pkg, conf) =
379 first_object(&results).ok_or("expected Object shape from My::App->new()")?;
380 assert_eq!(pkg, "My::App");
381 assert_eq!(conf, Confidence::Medium);
382 Ok(())
383 }
384
385 #[test]
388 fn bless_with_package_infers_object_low() -> Result<(), String> {
389 let code = "package Foo;\nsub new { my $self = bless {}, 'Foo'; }\n";
390 let results = parse_and_infer(code);
391 let (pkg, conf) =
392 first_object(&results).ok_or("expected Object shape from bless {}, 'Foo'")?;
393 assert_eq!(pkg, "Foo");
394 assert_eq!(conf, Confidence::Low);
395 Ok(())
396 }
397
398 #[test]
401 fn self_in_method_infers_enclosing_package() -> Result<(), String> {
402 let code = "package Bar;\nsub greet { my $msg = $self->name(); }\n";
403 let results = parse_and_infer(code);
404 let has_bar_medium = results.iter().any(|(_, shape)| {
406 matches!(shape, ValueShape::Object { package, confidence }
407 if package == "Bar" && *confidence == Confidence::Medium)
408 });
409 assert!(
410 has_bar_medium,
411 "expected $self to infer Object {{ Bar, Medium }}, got {results:?}"
412 );
413 Ok(())
414 }
415
416 #[test]
417 fn signature_self_infers_enclosing_package_high() -> Result<(), String> {
418 let code = "package Widget;\nsub render($self, $name) { return $name; }\n";
419 let results = parse_and_infer(code);
420 let confidence =
421 object_for_package(&results, "Widget").ok_or("expected signature self shape")?;
422 assert_eq!(confidence, Confidence::High);
423 Ok(())
424 }
425
426 #[test]
427 fn argument_unpack_self_infers_enclosing_package() -> Result<(), String> {
428 let code = "package Widget;\nsub render { my ($self, $name) = @_; return $name; }\n";
429 let results = parse_and_infer(code);
430 let confidence =
431 object_for_package(&results, "Widget").ok_or("expected @_ self unpack shape")?;
432 assert_eq!(confidence, Confidence::Medium);
433 Ok(())
434 }
435
436 #[test]
439 fn dbi_connect_infers_database_handle() -> Result<(), String> {
440 let results = parse_and_infer("my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:');\n");
441 let confidence =
442 object_for_package(&results, "DBI::db").ok_or("expected DBI::db handle shape")?;
443 assert_eq!(confidence, Confidence::Medium);
444 Ok(())
445 }
446
447 #[test]
448 fn dbh_prepare_infers_statement_handle_after_connect() -> Result<(), String> {
449 let code = "my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:');\nmy $sth = $dbh->prepare('select 1');\n";
450 let results = parse_and_infer(code);
451 let confidence =
452 object_for_package(&results, "DBI::st").ok_or("expected DBI::st statement shape")?;
453 assert_eq!(confidence, Confidence::Medium);
454 Ok(())
455 }
456
457 #[test]
458 fn prepare_on_unknown_receiver_does_not_infer_statement_handle() -> Result<(), String> {
459 let results = parse_and_infer("my $sth = $thing->prepare('select 1');\n");
460 assert!(
461 object_for_package(&results, "DBI::st").is_none(),
462 "unknown prepare receiver should not infer DBI::st: {results:?}"
463 );
464 Ok(())
465 }
466
467 #[test]
468 fn prepare_on_dbh_name_without_known_connect_does_not_infer_statement_handle()
469 -> Result<(), String> {
470 let results = parse_and_infer("my $sth = $dbh->prepare('select 1');\n");
471 assert!(
472 object_for_package(&results, "DBI::st").is_none(),
473 "$dbh naming alone should not infer DBI::st: {results:?}"
474 );
475 Ok(())
476 }
477
478 #[test]
481 fn plain_scalar_produces_no_shape() -> Result<(), String> {
482 let results = parse_and_infer("my $x = 42;\n");
483 assert!(first_object(&results).is_none(), "plain scalar should not produce Object shape");
485 Ok(())
486 }
487
488 #[test]
491 fn multiple_packages_track_context() -> Result<(), String> {
492 let code = r#"
493package Alpha;
494sub new { my $self = bless {}, 'Alpha'; }
495
496package Beta;
497sub new { my $self = bless {}, 'Beta'; }
498"#;
499 let results = parse_and_infer(code);
500 let has_alpha = results.iter().any(
501 |(_, shape)| matches!(shape, ValueShape::Object { package, .. } if package == "Alpha"),
502 );
503 let has_beta = results.iter().any(
504 |(_, shape)| matches!(shape, ValueShape::Object { package, .. } if package == "Beta"),
505 );
506 assert!(has_alpha, "expected Alpha object shape");
507 assert!(has_beta, "expected Beta object shape");
508 Ok(())
509 }
510}