Skip to main content

perl_semantic_analyzer/analysis/
package_graph_extractor.rs

1//! Package graph edge extraction from Perl inheritance and role-composition patterns.
2//!
3//! Walks the AST to extract [`PackageEdge`] entries that describe inheritance,
4//! role composition, and dependency relationships between Perl packages.
5//!
6//! # Supported Patterns
7//!
8//! | Perl source                              | `PackageEdgeKind`     |
9//! |------------------------------------------|-----------------------|
10//! | `use parent 'Base'`                      | `Inherits`            |
11//! | `use parent qw(Base1 Base2)`             | `Inherits`            |
12//! | `use base 'Base'`                        | `Inherits`            |
13//! | `use base qw(Base1 Base2)`               | `Inherits`            |
14//! | `@ISA = ('Base')`                        | `Inherits`            |
15//! | `our @ISA = qw(Base1 Base2)`             | `Inherits`            |
16//! | `push @ISA, 'Base'`                      | `Inherits`            |
17//! | `extends 'Base'` (Moo/Moose)             | `Inherits`            |
18//! | `with 'Role'` (Moo/Moose)               | `ComposesRole`        |
19
20use crate::ast::{Node, NodeKind};
21use perl_semantic_facts::{AnchorId, Confidence, FileId, PackageEdge, PackageEdgeKind, Provenance};
22
23/// Extractor that walks an AST to produce [`PackageEdge`] entries for each
24/// inheritance, role-composition, or dependency relationship found.
25pub struct PackageGraphExtractor;
26
27impl PackageGraphExtractor {
28    /// Walk the entire AST and return one [`PackageEdge`] per detected
29    /// inheritance or role-composition relationship.
30    ///
31    /// Each edge carries the supplied `file_id` and an `anchor_id` derived
32    /// from the statement's byte-offset span.
33    pub fn extract(ast: &Node, _file_id: FileId) -> Vec<PackageEdge> {
34        let mut state = ExtractorState { current_package: "main".to_string(), edges: Vec::new() };
35        state.walk(ast);
36        state.edges
37    }
38}
39
40/// Internal state for the recursive AST walk.
41struct ExtractorState {
42    /// Current package context (updated when `package Foo;` is encountered).
43    current_package: String,
44    /// Accumulated edges.
45    edges: Vec<PackageEdge>,
46}
47
48impl ExtractorState {
49    /// Recursive AST walker.
50    fn walk(&mut self, node: &Node) {
51        match &node.kind {
52            // For statement containers (Program, Block), walk statements in order
53            // so that `package Foo;` (semicolon form) updates the current package
54            // for subsequent sibling statements.
55            NodeKind::Program { statements } | NodeKind::Block { statements } => {
56                for stmt in statements {
57                    self.walk(stmt);
58                }
59                return;
60            }
61
62            // `package Foo { ... }` (block form) — scoped package context.
63            NodeKind::Package { name, block: Some(block), .. } => {
64                let prev_package = self.current_package.clone();
65                self.current_package = name.clone();
66                self.walk(block);
67                self.current_package = prev_package;
68                return;
69            }
70
71            // `package Foo;` (semicolon form) — updates current package for
72            // subsequent siblings. The actual statements follow as siblings
73            // in the parent Program/Block.
74            NodeKind::Package { name, block: None, .. } => {
75                self.current_package = name.clone();
76                return;
77            }
78
79            // `use parent 'Base'` / `use parent qw(Base1 Base2)`
80            // `use base 'Base'` / `use base qw(Base1 Base2)`
81            NodeKind::Use { module, args, .. } if module == "parent" || module == "base" => {
82                let anchor_id = Self::anchor_from_node(node);
83                let names = Self::extract_parent_names_from_args(args);
84                for name in names {
85                    self.emit_edge(name, PackageEdgeKind::Inherits, anchor_id, Confidence::High);
86                }
87            }
88
89            // `our @ISA = qw(Base1 Base2)` (VariableDeclaration form)
90            NodeKind::VariableDeclaration { variable, initializer: Some(init), .. } => {
91                if Self::is_isa_variable(variable) {
92                    let anchor_id = Self::anchor_from_node(node);
93                    let names = Self::collect_names_from_node(init);
94                    for name in names {
95                        self.emit_edge(
96                            name,
97                            PackageEdgeKind::Inherits,
98                            anchor_id,
99                            Confidence::High,
100                        );
101                    }
102                }
103            }
104
105            // `@ISA = qw(Base1 Base2)` (bare Assignment form)
106            NodeKind::Assignment { lhs, rhs, .. } => {
107                if Self::is_isa_variable(lhs) {
108                    let anchor_id = Self::anchor_from_node(node);
109                    let names = Self::collect_names_from_node(rhs);
110                    for name in names {
111                        self.emit_edge(
112                            name,
113                            PackageEdgeKind::Inherits,
114                            anchor_id,
115                            Confidence::High,
116                        );
117                    }
118                }
119            }
120
121            // `push @ISA, 'Base'` and `extends 'Base'` / `with 'Role'`
122            // Both appear as ExpressionStatement(FunctionCall { ... })
123            NodeKind::ExpressionStatement { expression } => {
124                self.handle_expression_statement(expression, node);
125            }
126
127            _ => {}
128        }
129
130        // Recurse into children for all other node types.
131        for child in node.children() {
132            self.walk(child);
133        }
134    }
135
136    /// Handle expression statements that may contain `push @ISA`, `extends`, or `with`.
137    fn handle_expression_statement(&mut self, expression: &Node, stmt_node: &Node) {
138        if let NodeKind::FunctionCall { name, args } = &expression.kind {
139            match name.as_str() {
140                // `push @ISA, 'Base1', 'Base2'`
141                "push" => {
142                    if let Some(first_arg) = args.first() {
143                        if Self::is_isa_variable(first_arg) {
144                            let anchor_id = Self::anchor_from_node(stmt_node);
145                            for arg in args.iter().skip(1) {
146                                let names = Self::collect_names_from_node(arg);
147                                for name in names {
148                                    self.emit_edge(
149                                        name,
150                                        PackageEdgeKind::Inherits,
151                                        anchor_id,
152                                        Confidence::High,
153                                    );
154                                }
155                            }
156                        }
157                    }
158                }
159                // `extends 'Base'` (Moo/Moose)
160                "extends" => {
161                    let anchor_id = Self::anchor_from_node(stmt_node);
162                    let names = Self::collect_names_from_args(args);
163                    for name in names {
164                        self.emit_edge(
165                            name,
166                            PackageEdgeKind::Inherits,
167                            anchor_id,
168                            Confidence::High,
169                        );
170                    }
171                }
172                // `with 'Role'` (Moo/Moose)
173                "with" => {
174                    let anchor_id = Self::anchor_from_node(stmt_node);
175                    let names = Self::collect_names_from_args(args);
176                    for name in names {
177                        self.emit_edge(
178                            name,
179                            PackageEdgeKind::ComposesRole,
180                            anchor_id,
181                            Confidence::High,
182                        );
183                    }
184                }
185                _ => {}
186            }
187        }
188
189        // Also handle the two-statement form where `extends`/`with` is parsed
190        // as a bare Identifier followed by a String in the next statement.
191        // This is handled by the parent walk since we process siblings.
192    }
193
194    // ── Helpers ─────────────────────────────────────────────────────────
195
196    /// Emit a [`PackageEdge`] from the current package to the given target.
197    fn emit_edge(
198        &mut self,
199        to_package: String,
200        kind: PackageEdgeKind,
201        anchor_id: AnchorId,
202        confidence: Confidence,
203    ) {
204        self.edges.push(PackageEdge::new(
205            self.current_package.clone(),
206            to_package,
207            kind,
208            Some(anchor_id),
209            Provenance::ExactAst,
210            confidence,
211        ));
212    }
213
214    /// Derive an [`AnchorId`] from a node's byte-offset span.
215    fn anchor_from_node(node: &Node) -> AnchorId {
216        AnchorId(node.location.start as u64)
217    }
218
219    /// Check whether a node is the `@ISA` variable.
220    fn is_isa_variable(node: &Node) -> bool {
221        matches!(&node.kind, NodeKind::Variable { sigil, name } if sigil == "@" && name == "ISA")
222    }
223
224    /// Extract parent class names from `use parent`/`use base` args.
225    ///
226    /// The parser stores args as strings. Handles:
227    /// - Quoted strings: `"'Parent'"` → `"Parent"`
228    /// - qw-lists: `"qw(Base1 Base2)"` → `["Base1", "Base2"]`
229    /// - Flags like `-norequire` are skipped.
230    fn extract_parent_names_from_args(args: &[String]) -> Vec<String> {
231        let mut names = Vec::new();
232        for arg in args {
233            let trimmed = arg.trim();
234            // Skip flags like -norequire
235            if trimmed.starts_with('-') || trimmed.is_empty() {
236                continue;
237            }
238            names.extend(Self::expand_arg_to_names(trimmed));
239        }
240        names
241    }
242
243    /// Expand a single arg string into individual class/role names.
244    ///
245    /// Handles qw(...) lists and quoted strings.
246    fn expand_arg_to_names(arg: &str) -> Vec<String> {
247        let arg = arg.trim();
248        // qw(...) form
249        if arg.starts_with("qw(") {
250            if let Some(content) = arg.strip_prefix("qw(").and_then(|s| s.strip_suffix(')')) {
251                return content
252                    .split_whitespace()
253                    .filter(|s| !s.is_empty())
254                    .map(|s| s.to_string())
255                    .collect();
256            }
257        }
258        // Other qw variants: qw{...}, qw[...], qw/.../ etc.
259        if arg.starts_with("qw") && arg.len() > 3 {
260            let bytes = arg.as_bytes();
261            let open = bytes[2] as char;
262            let close = match open {
263                '(' => ')',
264                '{' => '}',
265                '[' => ']',
266                '<' => '>',
267                c => c,
268            };
269            if let Some(end) = arg.rfind(close) {
270                if end > 3 {
271                    let content = &arg[3..end];
272                    return content
273                        .split_whitespace()
274                        .filter(|s| !s.is_empty())
275                        .map(|s| s.to_string())
276                        .collect();
277                }
278            }
279        }
280        // Quoted string: strip quotes
281        let unquoted = arg.trim_matches('\'').trim_matches('"').trim();
282        if unquoted.is_empty() {
283            return Vec::new();
284        }
285        vec![unquoted.to_string()]
286    }
287
288    /// Collect package/class/role names from an AST node (RHS of @ISA assignment
289    /// or argument to push/extends/with).
290    fn collect_names_from_node(node: &Node) -> Vec<String> {
291        match &node.kind {
292            NodeKind::String { value, .. } => {
293                let trimmed = value.trim_matches('\'').trim_matches('"').trim();
294                if trimmed.is_empty() { Vec::new() } else { vec![trimmed.to_string()] }
295            }
296            NodeKind::Identifier { name } => {
297                // Handle qw(...) stored as identifier
298                if name.starts_with("qw") {
299                    Self::expand_arg_to_names(name)
300                } else if name.is_empty() {
301                    Vec::new()
302                } else {
303                    vec![name.clone()]
304                }
305            }
306            NodeKind::ArrayLiteral { elements } => {
307                elements.iter().flat_map(Self::collect_names_from_node).collect()
308            }
309            _ => Vec::new(),
310        }
311    }
312
313    /// Collect names from function call arguments (Vec<Node>).
314    fn collect_names_from_args(args: &[Node]) -> Vec<String> {
315        args.iter().flat_map(Self::collect_names_from_node).collect()
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use crate::Parser;
323
324    /// Parse Perl source and extract package graph edges.
325    fn parse_and_extract(code: &str) -> Vec<PackageEdge> {
326        let mut parser = Parser::new(code);
327        let ast = match parser.parse() {
328            Ok(ast) => ast,
329            Err(_) => return Vec::new(),
330        };
331        PackageGraphExtractor::extract(&ast, FileId(1))
332    }
333
334    // ── use parent 'Base' → Inherits ────────────────────────────────────
335
336    #[test]
337    fn test_use_parent_single() -> Result<(), String> {
338        let edges = parse_and_extract("package Child;\nuse parent 'Base';\n1;");
339        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
340
341        assert_eq!(edge.from_package, "Child");
342        assert_eq!(edge.to_package, "Base");
343        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
344        assert_eq!(edge.provenance, Provenance::ExactAst);
345        assert_eq!(edge.confidence, Confidence::High);
346        assert!(edge.anchor_id.is_some());
347        Ok(())
348    }
349
350    #[test]
351    fn test_use_parent_qw_multiple() -> Result<(), String> {
352        let edges = parse_and_extract("package Child;\nuse parent qw(Base1 Base2);\n1;");
353        assert_eq!(edges.len(), 2, "expected two edges, got {}", edges.len());
354
355        assert_eq!(edges[0].from_package, "Child");
356        assert_eq!(edges[0].to_package, "Base1");
357        assert_eq!(edges[0].kind, PackageEdgeKind::Inherits);
358
359        assert_eq!(edges[1].from_package, "Child");
360        assert_eq!(edges[1].to_package, "Base2");
361        assert_eq!(edges[1].kind, PackageEdgeKind::Inherits);
362        Ok(())
363    }
364
365    #[test]
366    fn test_use_parent_with_norequire() -> Result<(), String> {
367        let edges = parse_and_extract("package Child;\nuse parent -norequire, 'Base';\n1;");
368        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
369
370        assert_eq!(edge.from_package, "Child");
371        assert_eq!(edge.to_package, "Base");
372        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
373        Ok(())
374    }
375
376    // ── use base 'Base' → Inherits ──────────────────────────────────────
377
378    #[test]
379    fn test_use_base_single() -> Result<(), String> {
380        let edges = parse_and_extract("package Child;\nuse base 'Base';\n1;");
381        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
382
383        assert_eq!(edge.from_package, "Child");
384        assert_eq!(edge.to_package, "Base");
385        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
386        assert_eq!(edge.confidence, Confidence::High);
387        Ok(())
388    }
389
390    #[test]
391    fn test_use_base_qw_multiple() -> Result<(), String> {
392        let edges = parse_and_extract("package Child;\nuse base qw(Base1 Base2);\n1;");
393        assert_eq!(edges.len(), 2, "expected two edges, got {}", edges.len());
394
395        assert_eq!(edges[0].to_package, "Base1");
396        assert_eq!(edges[1].to_package, "Base2");
397        Ok(())
398    }
399
400    // ── @ISA = ('Base') → Inherits ──────────────────────────────────────
401
402    #[test]
403    fn test_isa_assignment_bare() -> Result<(), String> {
404        let edges = parse_and_extract("package Child;\n@ISA = ('Base');\n1;");
405        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
406
407        assert_eq!(edge.from_package, "Child");
408        assert_eq!(edge.to_package, "Base");
409        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
410        Ok(())
411    }
412
413    #[test]
414    fn test_isa_assignment_our() -> Result<(), String> {
415        let edges = parse_and_extract("package Child;\nour @ISA = qw(Base1 Base2);\n1;");
416        assert_eq!(edges.len(), 2, "expected two edges, got {}", edges.len());
417
418        assert_eq!(edges[0].to_package, "Base1");
419        assert_eq!(edges[1].to_package, "Base2");
420        Ok(())
421    }
422
423    // ── push @ISA, 'Base' → Inherits ────────────────────────────────────
424
425    #[test]
426    fn test_push_isa_single() -> Result<(), String> {
427        let edges = parse_and_extract("package Child;\npush @ISA, 'Base';\n1;");
428        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
429
430        assert_eq!(edge.from_package, "Child");
431        assert_eq!(edge.to_package, "Base");
432        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
433        Ok(())
434    }
435
436    #[test]
437    fn test_push_isa_multiple() -> Result<(), String> {
438        let edges = parse_and_extract("package Child;\npush @ISA, 'Base1', 'Base2';\n1;");
439        assert_eq!(edges.len(), 2, "expected two edges, got {}", edges.len());
440
441        assert_eq!(edges[0].to_package, "Base1");
442        assert_eq!(edges[1].to_package, "Base2");
443        Ok(())
444    }
445
446    // ── extends 'Base' (Moo/Moose) → Inherits ──────────────────────────
447
448    #[test]
449    fn test_extends_single() -> Result<(), String> {
450        let edges =
451            parse_and_extract("package MyApp::Admin;\nuse Moose;\nextends 'MyApp::User';\n1;");
452        // May also get a DependsOn for `use Moose` — filter to Inherits.
453        let inherits: Vec<_> =
454            edges.iter().filter(|e| e.kind == PackageEdgeKind::Inherits).collect();
455        let edge = inherits.first().ok_or("expected at least one Inherits edge")?;
456
457        assert_eq!(edge.from_package, "MyApp::Admin");
458        assert_eq!(edge.to_package, "MyApp::User");
459        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
460        Ok(())
461    }
462
463    // ── with 'Role' (Moo/Moose) → ComposesRole ─────────────────────────
464
465    #[test]
466    fn test_with_single_role() -> Result<(), String> {
467        let edges =
468            parse_and_extract("package MyApp::User;\nuse Moose;\nwith 'MyApp::Printable';\n1;");
469        let roles: Vec<_> =
470            edges.iter().filter(|e| e.kind == PackageEdgeKind::ComposesRole).collect();
471        let edge = roles.first().ok_or("expected at least one ComposesRole edge")?;
472
473        assert_eq!(edge.from_package, "MyApp::User");
474        assert_eq!(edge.to_package, "MyApp::Printable");
475        assert_eq!(edge.kind, PackageEdgeKind::ComposesRole);
476        Ok(())
477    }
478
479    #[test]
480    fn test_with_multiple_roles() -> Result<(), String> {
481        let edges =
482            parse_and_extract("package MyApp::User;\nuse Moose;\nwith 'Role1', 'Role2';\n1;");
483        let roles: Vec<_> =
484            edges.iter().filter(|e| e.kind == PackageEdgeKind::ComposesRole).collect();
485        assert_eq!(roles.len(), 2, "expected two ComposesRole edges, got {}", roles.len());
486
487        assert_eq!(roles[0].to_package, "Role1");
488        assert_eq!(roles[1].to_package, "Role2");
489        Ok(())
490    }
491
492    // ── Package context tracking ────────────────────────────────────────
493
494    #[test]
495    fn test_multiple_packages() -> Result<(), String> {
496        let code = r#"
497package Parent;
4981;
499
500package Child;
501use parent 'Parent';
5021;
503"#;
504        let edges = parse_and_extract(code);
505        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
506
507        assert_eq!(edge.from_package, "Child");
508        assert_eq!(edge.to_package, "Parent");
509        Ok(())
510    }
511
512    #[test]
513    fn test_default_main_package() -> Result<(), String> {
514        let edges = parse_and_extract("use parent 'Base';\n1;");
515        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
516
517        assert_eq!(edge.from_package, "main");
518        assert_eq!(edge.to_package, "Base");
519        Ok(())
520    }
521
522    // ── Combined patterns ───────────────────────────────────────────────
523
524    #[test]
525    fn test_extends_and_with_combined() -> Result<(), String> {
526        let code = r#"
527package MyApp::Admin;
528use Moose;
529extends 'MyApp::User';
530with 'MyApp::Printable', 'MyApp::Serializable';
5311;
532"#;
533        let edges = parse_and_extract(code);
534        let inherits: Vec<_> =
535            edges.iter().filter(|e| e.kind == PackageEdgeKind::Inherits).collect();
536        let roles: Vec<_> =
537            edges.iter().filter(|e| e.kind == PackageEdgeKind::ComposesRole).collect();
538
539        assert_eq!(inherits.len(), 1, "expected one Inherits edge");
540        assert_eq!(inherits[0].to_package, "MyApp::User");
541
542        assert_eq!(roles.len(), 2, "expected two ComposesRole edges");
543        assert_eq!(roles[0].to_package, "MyApp::Printable");
544        assert_eq!(roles[1].to_package, "MyApp::Serializable");
545        Ok(())
546    }
547
548    // ── No edges for unrelated code ─────────────────────────────────────
549
550    #[test]
551    fn test_no_edges_for_plain_use() -> Result<(), String> {
552        let edges = parse_and_extract("package Foo;\nuse strict;\nuse warnings;\n1;");
553        // No inheritance or role edges expected.
554        let inheritance_edges: Vec<_> = edges
555            .iter()
556            .filter(|e| {
557                e.kind == PackageEdgeKind::Inherits || e.kind == PackageEdgeKind::ComposesRole
558            })
559            .collect();
560        assert!(
561            inheritance_edges.is_empty(),
562            "expected no inheritance/role edges, got {inheritance_edges:?}"
563        );
564        Ok(())
565    }
566
567    // ── Qualified package names ─────────────────────────────────────────
568
569    #[test]
570    fn test_qualified_parent_names() -> Result<(), String> {
571        let edges = parse_and_extract("package My::Child;\nuse parent 'My::Base::Class';\n1;");
572        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
573
574        assert_eq!(edge.from_package, "My::Child");
575        assert_eq!(edge.to_package, "My::Base::Class");
576        Ok(())
577    }
578}