Skip to main content

perl_semantic_analyzer/analysis/
package_graph_extractor.rs

1//! Package graph edge extraction from Perl inheritance and role-composition patterns.
2//!
3//! Walks the AST to extract [`PackageEdge`] entries that describe inheritance,
4//! role composition, and dependency relationships between Perl packages.
5//!
6//! # Supported Patterns
7//!
8//! | Perl source                              | `PackageEdgeKind`     |
9//! |------------------------------------------|-----------------------|
10//! | `use parent 'Base'`                      | `Inherits`            |
11//! | `use parent qw(Base1 Base2)`             | `Inherits`            |
12//! | `use base 'Base'`                        | `Inherits`            |
13//! | `use base qw(Base1 Base2)`               | `Inherits`            |
14//! | `@ISA = ('Base')`                        | `Inherits`            |
15//! | `our @ISA = qw(Base1 Base2)`             | `Inherits`            |
16//! | `push @ISA, 'Base'`                      | `Inherits`            |
17//! | `extends 'Base'` (Moo/Moose)             | `Inherits`            |
18//! | `with 'Role'` (Moo/Moose)               | `ComposesRole`        |
19
20use crate::ast::{Node, NodeKind};
21use perl_semantic_facts::{AnchorId, Confidence, FileId, PackageEdge, PackageEdgeKind, Provenance};
22
23/// Extractor that walks an AST to produce [`PackageEdge`] entries for each
24/// inheritance, role-composition, or dependency relationship found.
25pub struct PackageGraphExtractor;
26
27impl PackageGraphExtractor {
28    /// Walk the entire AST and return one [`PackageEdge`] per detected
29    /// inheritance or role-composition relationship.
30    ///
31    /// Each edge carries the supplied `file_id` and an `anchor_id` derived
32    /// from the statement's byte-offset span.
33    pub fn extract(ast: &Node, _file_id: FileId) -> Vec<PackageEdge> {
34        let mut state = ExtractorState { current_package: "main".to_string(), edges: Vec::new() };
35        state.walk(ast);
36        state.edges
37    }
38}
39
40/// Internal state for the recursive AST walk.
41struct ExtractorState {
42    /// Current package context (updated when `package Foo;` is encountered).
43    current_package: String,
44    /// Accumulated edges.
45    edges: Vec<PackageEdge>,
46}
47
48impl ExtractorState {
49    /// Walk a statement list in source order using the current package context.
50    fn walk_statements(&mut self, statements: &[Node]) {
51        for stmt in statements {
52            self.walk(stmt);
53        }
54    }
55
56    /// Recursive AST walker.
57    fn walk(&mut self, node: &Node) {
58        match &node.kind {
59            // For the top-level program, walk statements in order so that
60            // `package Foo;` (semicolon form) updates the current package for
61            // subsequent sibling statements through the end of the file.
62            NodeKind::Program { statements } => {
63                self.walk_statements(statements);
64                return;
65            }
66
67            // Bare blocks introduce a lexical package scope: `package Foo;` inside
68            // the block applies to later statements in that block, but the outer
69            // package resumes after the block.
70            NodeKind::Block { statements } => {
71                let prev_package = self.current_package.clone();
72                self.walk_statements(statements);
73                self.current_package = prev_package;
74                return;
75            }
76
77            // `package Foo { ... }` (block form) — scoped package context.
78            NodeKind::Package { name, block: Some(block), .. } => {
79                let prev_package = self.current_package.clone();
80                self.current_package = name.clone();
81                self.walk(block);
82                self.current_package = prev_package;
83                return;
84            }
85
86            // `package Foo;` (semicolon form) — updates current package for
87            // subsequent siblings. The actual statements follow as siblings
88            // in the parent Program/Block.
89            NodeKind::Package { name, block: None, .. } => {
90                self.current_package = name.clone();
91                return;
92            }
93
94            // `use parent 'Base'` / `use parent qw(Base1 Base2)`
95            // `use base 'Base'` / `use base qw(Base1 Base2)`
96            NodeKind::Use { module, args, .. } if module == "parent" || module == "base" => {
97                let anchor_id = Self::anchor_from_node(node);
98                let names = Self::extract_parent_names_from_args(args);
99                for name in names {
100                    self.emit_edge(name, PackageEdgeKind::Inherits, anchor_id, Confidence::High);
101                }
102            }
103
104            // `our @ISA = qw(Base1 Base2)` (VariableDeclaration form)
105            NodeKind::VariableDeclaration { variable, initializer: Some(init), .. } => {
106                if Self::is_isa_variable(variable) {
107                    let anchor_id = Self::anchor_from_node(node);
108                    let names = Self::collect_names_from_node(init);
109                    for name in names {
110                        self.emit_edge(
111                            name,
112                            PackageEdgeKind::Inherits,
113                            anchor_id,
114                            Confidence::High,
115                        );
116                    }
117                }
118            }
119
120            // `@ISA = qw(Base1 Base2)` (bare Assignment form)
121            NodeKind::Assignment { lhs, rhs, .. } => {
122                if Self::is_isa_variable(lhs) {
123                    let anchor_id = Self::anchor_from_node(node);
124                    let names = Self::collect_names_from_node(rhs);
125                    for name in names {
126                        self.emit_edge(
127                            name,
128                            PackageEdgeKind::Inherits,
129                            anchor_id,
130                            Confidence::High,
131                        );
132                    }
133                }
134            }
135
136            // `push @ISA, 'Base'` and `extends 'Base'` / `with 'Role'`
137            // Both appear as ExpressionStatement(FunctionCall { ... })
138            NodeKind::ExpressionStatement { expression } => {
139                self.handle_expression_statement(expression, node);
140            }
141
142            _ => {}
143        }
144
145        // Recurse into children for all other node types.
146        for child in node.children() {
147            self.walk(child);
148        }
149    }
150
151    /// Handle expression statements that may contain `push @ISA`, `extends`, or `with`.
152    fn handle_expression_statement(&mut self, expression: &Node, stmt_node: &Node) {
153        if let NodeKind::FunctionCall { name, args } = &expression.kind {
154            match name.as_str() {
155                // `push @ISA, 'Base1', 'Base2'`
156                "push" => {
157                    if let Some(first_arg) = args.first() {
158                        if Self::is_isa_variable(first_arg) {
159                            let anchor_id = Self::anchor_from_node(stmt_node);
160                            for arg in args.iter().skip(1) {
161                                let names = Self::collect_names_from_node(arg);
162                                for name in names {
163                                    self.emit_edge(
164                                        name,
165                                        PackageEdgeKind::Inherits,
166                                        anchor_id,
167                                        Confidence::High,
168                                    );
169                                }
170                            }
171                        }
172                    }
173                }
174                // `extends 'Base'` (Moo/Moose)
175                "extends" => {
176                    let anchor_id = Self::anchor_from_node(stmt_node);
177                    let names = Self::collect_names_from_args(args);
178                    for name in names {
179                        self.emit_edge(
180                            name,
181                            PackageEdgeKind::Inherits,
182                            anchor_id,
183                            Confidence::High,
184                        );
185                    }
186                }
187                // `with 'Role'` (Moo/Moose)
188                "with" => {
189                    let anchor_id = Self::anchor_from_node(stmt_node);
190                    let names = Self::collect_names_from_args(args);
191                    for name in names {
192                        self.emit_edge(
193                            name,
194                            PackageEdgeKind::ComposesRole,
195                            anchor_id,
196                            Confidence::High,
197                        );
198                    }
199                }
200                _ => {}
201            }
202        }
203
204        // Also handle the two-statement form where `extends`/`with` is parsed
205        // as a bare Identifier followed by a String in the next statement.
206        // This is handled by the parent walk since we process siblings.
207    }
208
209    // ── Helpers ─────────────────────────────────────────────────────────
210
211    /// Emit a [`PackageEdge`] from the current package to the given target.
212    fn emit_edge(
213        &mut self,
214        to_package: String,
215        kind: PackageEdgeKind,
216        anchor_id: AnchorId,
217        confidence: Confidence,
218    ) {
219        self.edges.push(PackageEdge::new(
220            self.current_package.clone(),
221            to_package,
222            kind,
223            Some(anchor_id),
224            Provenance::ExactAst,
225            confidence,
226        ));
227    }
228
229    /// Derive an [`AnchorId`] from a node's byte-offset span.
230    fn anchor_from_node(node: &Node) -> AnchorId {
231        AnchorId(node.location.start as u64)
232    }
233
234    /// Check whether a node is the `@ISA` variable.
235    fn is_isa_variable(node: &Node) -> bool {
236        matches!(&node.kind, NodeKind::Variable { sigil, name } if sigil == "@" && name == "ISA")
237    }
238
239    /// Extract parent class names from `use parent`/`use base` args.
240    ///
241    /// The parser stores args as strings. Handles:
242    /// - Quoted strings: `"'Parent'"` → `"Parent"`
243    /// - qw-lists: `"qw(Base1 Base2)"` → `["Base1", "Base2"]`
244    /// - Flags like `-norequire` are skipped.
245    fn extract_parent_names_from_args(args: &[String]) -> Vec<String> {
246        let mut names = Vec::new();
247        for arg in args {
248            let trimmed = arg.trim();
249            // Skip flags like -norequire
250            if trimmed.starts_with('-') || trimmed.is_empty() {
251                continue;
252            }
253            names.extend(Self::expand_arg_to_names(trimmed));
254        }
255        names
256    }
257
258    /// Expand a single arg string into individual class/role names.
259    ///
260    /// Handles qw(...) lists and quoted strings.
261    fn expand_arg_to_names(arg: &str) -> Vec<String> {
262        let arg = arg.trim();
263        // qw(...) form
264        if arg.starts_with("qw(") {
265            if let Some(content) = arg.strip_prefix("qw(").and_then(|s| s.strip_suffix(')')) {
266                return content
267                    .split_whitespace()
268                    .filter(|s| !s.is_empty())
269                    .map(|s| s.to_string())
270                    .collect();
271            }
272        }
273        // Other qw variants: qw{...}, qw[...], qw/.../ etc.
274        if arg.starts_with("qw") && arg.len() > 3 {
275            let bytes = arg.as_bytes();
276            let open = bytes[2] as char;
277            let close = match open {
278                '(' => ')',
279                '{' => '}',
280                '[' => ']',
281                '<' => '>',
282                c => c,
283            };
284            if let Some(end) = arg.rfind(close) {
285                if end > 3 {
286                    let content = &arg[3..end];
287                    return content
288                        .split_whitespace()
289                        .filter(|s| !s.is_empty())
290                        .map(|s| s.to_string())
291                        .collect();
292                }
293            }
294        }
295        // Quoted string: strip quotes
296        let unquoted = arg.trim_matches('\'').trim_matches('"').trim();
297        if unquoted.is_empty() {
298            return Vec::new();
299        }
300        vec![unquoted.to_string()]
301    }
302
303    /// Collect package/class/role names from an AST node (RHS of @ISA assignment
304    /// or argument to push/extends/with).
305    fn collect_names_from_node(node: &Node) -> Vec<String> {
306        match &node.kind {
307            NodeKind::String { value, .. } => {
308                let trimmed = value.trim_matches('\'').trim_matches('"').trim();
309                if trimmed.is_empty() { Vec::new() } else { vec![trimmed.to_string()] }
310            }
311            NodeKind::Identifier { name } => {
312                // Handle qw(...) stored as identifier
313                if name.starts_with("qw") {
314                    Self::expand_arg_to_names(name)
315                } else if name.is_empty() {
316                    Vec::new()
317                } else {
318                    vec![name.clone()]
319                }
320            }
321            NodeKind::ArrayLiteral { elements } => {
322                elements.iter().flat_map(Self::collect_names_from_node).collect()
323            }
324            _ => Vec::new(),
325        }
326    }
327
328    /// Collect names from function call arguments (Vec<Node>).
329    fn collect_names_from_args(args: &[Node]) -> Vec<String> {
330        args.iter().flat_map(Self::collect_names_from_node).collect()
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337    use crate::Parser;
338
339    /// Parse Perl source and extract package graph edges.
340    fn parse_and_extract(code: &str) -> Vec<PackageEdge> {
341        let mut parser = Parser::new(code);
342        let ast = match parser.parse() {
343            Ok(ast) => ast,
344            Err(_) => return Vec::new(),
345        };
346        PackageGraphExtractor::extract(&ast, FileId(1))
347    }
348
349    // ── use parent 'Base' → Inherits ────────────────────────────────────
350
351    #[test]
352    fn test_use_parent_single() -> Result<(), String> {
353        let edges = parse_and_extract("package Child;\nuse parent 'Base';\n1;");
354        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
355
356        assert_eq!(edge.from_package, "Child");
357        assert_eq!(edge.to_package, "Base");
358        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
359        assert_eq!(edge.provenance, Provenance::ExactAst);
360        assert_eq!(edge.confidence, Confidence::High);
361        assert!(edge.anchor_id.is_some());
362        Ok(())
363    }
364
365    #[test]
366    fn test_use_parent_qw_multiple() -> Result<(), String> {
367        let edges = parse_and_extract("package Child;\nuse parent qw(Base1 Base2);\n1;");
368        assert_eq!(edges.len(), 2, "expected two edges, got {}", edges.len());
369
370        assert_eq!(edges[0].from_package, "Child");
371        assert_eq!(edges[0].to_package, "Base1");
372        assert_eq!(edges[0].kind, PackageEdgeKind::Inherits);
373
374        assert_eq!(edges[1].from_package, "Child");
375        assert_eq!(edges[1].to_package, "Base2");
376        assert_eq!(edges[1].kind, PackageEdgeKind::Inherits);
377        Ok(())
378    }
379
380    #[test]
381    fn test_use_parent_with_norequire() -> Result<(), String> {
382        let edges = parse_and_extract("package Child;\nuse parent -norequire, 'Base';\n1;");
383        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
384
385        assert_eq!(edge.from_package, "Child");
386        assert_eq!(edge.to_package, "Base");
387        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
388        Ok(())
389    }
390
391    // ── use base 'Base' → Inherits ──────────────────────────────────────
392
393    #[test]
394    fn test_use_base_single() -> Result<(), String> {
395        let edges = parse_and_extract("package Child;\nuse base 'Base';\n1;");
396        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
397
398        assert_eq!(edge.from_package, "Child");
399        assert_eq!(edge.to_package, "Base");
400        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
401        assert_eq!(edge.confidence, Confidence::High);
402        Ok(())
403    }
404
405    #[test]
406    fn test_use_base_qw_multiple() -> Result<(), String> {
407        let edges = parse_and_extract("package Child;\nuse base qw(Base1 Base2);\n1;");
408        assert_eq!(edges.len(), 2, "expected two edges, got {}", edges.len());
409
410        assert_eq!(edges[0].to_package, "Base1");
411        assert_eq!(edges[1].to_package, "Base2");
412        Ok(())
413    }
414
415    // ── @ISA = ('Base') → Inherits ──────────────────────────────────────
416
417    #[test]
418    fn test_isa_assignment_bare() -> Result<(), String> {
419        let edges = parse_and_extract("package Child;\n@ISA = ('Base');\n1;");
420        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
421
422        assert_eq!(edge.from_package, "Child");
423        assert_eq!(edge.to_package, "Base");
424        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
425        Ok(())
426    }
427
428    #[test]
429    fn test_isa_assignment_our() -> Result<(), String> {
430        let edges = parse_and_extract("package Child;\nour @ISA = qw(Base1 Base2);\n1;");
431        assert_eq!(edges.len(), 2, "expected two edges, got {}", edges.len());
432
433        assert_eq!(edges[0].to_package, "Base1");
434        assert_eq!(edges[1].to_package, "Base2");
435        Ok(())
436    }
437
438    // ── push @ISA, 'Base' → Inherits ────────────────────────────────────
439
440    #[test]
441    fn test_push_isa_single() -> Result<(), String> {
442        let edges = parse_and_extract("package Child;\npush @ISA, 'Base';\n1;");
443        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
444
445        assert_eq!(edge.from_package, "Child");
446        assert_eq!(edge.to_package, "Base");
447        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
448        Ok(())
449    }
450
451    #[test]
452    fn test_push_isa_multiple() -> Result<(), String> {
453        let edges = parse_and_extract("package Child;\npush @ISA, 'Base1', 'Base2';\n1;");
454        assert_eq!(edges.len(), 2, "expected two edges, got {}", edges.len());
455
456        assert_eq!(edges[0].to_package, "Base1");
457        assert_eq!(edges[1].to_package, "Base2");
458        Ok(())
459    }
460
461    // ── extends 'Base' (Moo/Moose) → Inherits ──────────────────────────
462
463    #[test]
464    fn test_extends_single() -> Result<(), String> {
465        let edges =
466            parse_and_extract("package MyApp::Admin;\nuse Moose;\nextends 'MyApp::User';\n1;");
467        // May also get a DependsOn for `use Moose` — filter to Inherits.
468        let inherits: Vec<_> =
469            edges.iter().filter(|e| e.kind == PackageEdgeKind::Inherits).collect();
470        let edge = inherits.first().ok_or("expected at least one Inherits edge")?;
471
472        assert_eq!(edge.from_package, "MyApp::Admin");
473        assert_eq!(edge.to_package, "MyApp::User");
474        assert_eq!(edge.kind, PackageEdgeKind::Inherits);
475        Ok(())
476    }
477
478    // ── with 'Role' (Moo/Moose) → ComposesRole ─────────────────────────
479
480    #[test]
481    fn test_with_single_role() -> Result<(), String> {
482        let edges =
483            parse_and_extract("package MyApp::User;\nuse Moose;\nwith 'MyApp::Printable';\n1;");
484        let roles: Vec<_> =
485            edges.iter().filter(|e| e.kind == PackageEdgeKind::ComposesRole).collect();
486        let edge = roles.first().ok_or("expected at least one ComposesRole edge")?;
487
488        assert_eq!(edge.from_package, "MyApp::User");
489        assert_eq!(edge.to_package, "MyApp::Printable");
490        assert_eq!(edge.kind, PackageEdgeKind::ComposesRole);
491        Ok(())
492    }
493
494    #[test]
495    fn test_with_multiple_roles() -> Result<(), String> {
496        let edges =
497            parse_and_extract("package MyApp::User;\nuse Moose;\nwith 'Role1', 'Role2';\n1;");
498        let roles: Vec<_> =
499            edges.iter().filter(|e| e.kind == PackageEdgeKind::ComposesRole).collect();
500        assert_eq!(roles.len(), 2, "expected two ComposesRole edges, got {}", roles.len());
501
502        assert_eq!(roles[0].to_package, "Role1");
503        assert_eq!(roles[1].to_package, "Role2");
504        Ok(())
505    }
506
507    // ── Package context tracking ────────────────────────────────────────
508
509    #[test]
510    fn test_multiple_packages() -> Result<(), String> {
511        let code = r#"
512package Parent;
5131;
514
515package Child;
516use parent 'Parent';
5171;
518"#;
519        let edges = parse_and_extract(code);
520        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
521
522        assert_eq!(edge.from_package, "Child");
523        assert_eq!(edge.to_package, "Parent");
524        Ok(())
525    }
526
527    #[test]
528    fn test_default_main_package() -> Result<(), String> {
529        let edges = parse_and_extract("use parent 'Base';\n1;");
530        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
531
532        assert_eq!(edge.from_package, "main");
533        assert_eq!(edge.to_package, "Base");
534        Ok(())
535    }
536
537    #[test]
538    fn test_package_declaration_inside_block_restores_outer_package() -> Result<(), String> {
539        let code = r#"
540package Outer;
541{
542    package Inner;
543    use parent 'InnerBase';
544}
545use parent 'OuterBase';
5461;
547"#;
548        let edges = parse_and_extract(code);
549        assert_eq!(edges.len(), 2, "expected two inheritance edges, got {}", edges.len());
550
551        assert_eq!(edges[0].from_package, "Inner");
552        assert_eq!(edges[0].to_package, "InnerBase");
553        assert_eq!(edges[1].from_package, "Outer");
554        assert_eq!(edges[1].to_package, "OuterBase");
555        Ok(())
556    }
557
558    // ── Combined patterns ───────────────────────────────────────────────
559
560    #[test]
561    fn test_extends_and_with_combined() -> Result<(), String> {
562        let code = r#"
563package MyApp::Admin;
564use Moose;
565extends 'MyApp::User';
566with 'MyApp::Printable', 'MyApp::Serializable';
5671;
568"#;
569        let edges = parse_and_extract(code);
570        let inherits: Vec<_> =
571            edges.iter().filter(|e| e.kind == PackageEdgeKind::Inherits).collect();
572        let roles: Vec<_> =
573            edges.iter().filter(|e| e.kind == PackageEdgeKind::ComposesRole).collect();
574
575        assert_eq!(inherits.len(), 1, "expected one Inherits edge");
576        assert_eq!(inherits[0].to_package, "MyApp::User");
577
578        assert_eq!(roles.len(), 2, "expected two ComposesRole edges");
579        assert_eq!(roles[0].to_package, "MyApp::Printable");
580        assert_eq!(roles[1].to_package, "MyApp::Serializable");
581        Ok(())
582    }
583
584    // ── No edges for unrelated code ─────────────────────────────────────
585
586    #[test]
587    fn test_no_edges_for_plain_use() -> Result<(), String> {
588        let edges = parse_and_extract("package Foo;\nuse strict;\nuse warnings;\n1;");
589        // No inheritance or role edges expected.
590        let inheritance_edges: Vec<_> = edges
591            .iter()
592            .filter(|e| {
593                e.kind == PackageEdgeKind::Inherits || e.kind == PackageEdgeKind::ComposesRole
594            })
595            .collect();
596        assert!(
597            inheritance_edges.is_empty(),
598            "expected no inheritance/role edges, got {inheritance_edges:?}"
599        );
600        Ok(())
601    }
602
603    // ── Qualified package names ─────────────────────────────────────────
604
605    #[test]
606    fn test_qualified_parent_names() -> Result<(), String> {
607        let edges = parse_and_extract("package My::Child;\nuse parent 'My::Base::Class';\n1;");
608        let edge = edges.first().ok_or("expected at least one PackageEdge")?;
609
610        assert_eq!(edge.from_package, "My::Child");
611        assert_eq!(edge.to_package, "My::Base::Class");
612        Ok(())
613    }
614}