Skip to main content

perl_semantic_analyzer/analysis/
import_extractor.rs

1//! Import specification extractor for `use` and `require` statements.
2//!
3//! Walks the AST to extract [`ImportSpec`] entries from Perl `use` and `require`
4//! statements, classifying each import site by its syntactic shape
5//! ([`ImportKind`]) and symbol selection policy ([`ImportSymbols`]).
6//!
7//! # Supported Patterns
8//!
9//! | Perl source                              | `ImportKind`          | `ImportSymbols`              |
10//! |------------------------------------------|-----------------------|------------------------------|
11//! | `use Module qw(a b)`                     | `UseExplicitList`     | `Explicit(["a", "b"])`       |
12//! | `use Module ()`                          | `UseEmpty`            | `None`                       |
13//! | `use Module ':tag'`                      | `UseTag`              | `Tags(["tag"])`              |
14//! | `use Module` (bare)                      | `Use`                 | `Default`                    |
15//! | `use constant { FOO => 1 }`              | `UseConstant`         | `Explicit(["FOO"])`          |
16//! | `use constant PI => 3.14`                | `UseConstant`         | `Explicit(["PI"])`           |
17//! | `require Module`                         | `Require`             | `Default`                    |
18//! | `require Module; Module->import(...)`    | `RequireThenImport`   | `Explicit([...])` / `Default`|
19//! | `require $var`                           | `DynamicRequire`      | `Dynamic`                    |
20
21use crate::ast::{Node, NodeKind};
22use perl_semantic_facts::{
23    AnchorId, Confidence, FileId, ImportKind, ImportSpec, ImportSymbols, Provenance,
24};
25
26/// Extractor that walks an AST to produce [`ImportSpec`] entries for each
27/// `use` and `require` statement found.
28pub struct ImportExtractor;
29
30impl ImportExtractor {
31    /// Walk the entire AST and return one [`ImportSpec`] per `use` or
32    /// `require` statement.
33    ///
34    /// Each spec carries the supplied `file_id` and an `anchor_id` derived from
35    /// the statement's byte-offset span.
36    pub fn extract(ast: &Node, file_id: FileId) -> Vec<ImportSpec> {
37        let mut specs = Vec::new();
38        Self::walk(ast, file_id, &mut specs);
39        specs
40    }
41
42    // ── AST walker ──────────────────────────────────────────────────────
43
44    fn walk(node: &Node, file_id: FileId, out: &mut Vec<ImportSpec>) {
45        // Handle `use` statements directly.
46        if let NodeKind::Use { module, args, .. } = &node.kind {
47            if let Some(spec) = Self::classify_use(module, args, file_id, node) {
48                out.push(spec);
49            }
50        }
51
52        // Detect standalone `ClassName->import(...)` method calls where
53        // `ClassName` is a static identifier (not a variable).
54        //
55        // These are NOT preceded by a `require` statement. The exported
56        // symbol list is often dynamic (e.g. `Foo->import(@names)`), so
57        // we emit `ImportSymbols::Dynamic` conservatively.
58        //
59        // This covers Case 3 in the PR-B spec: a static class name with
60        // dynamic arguments signals that some set of symbols is imported
61        // from `Foo`, but the exact names are not statically known.
62        if let Some(spec) = Self::try_classify_standalone_class_import(node, file_id) {
63            out.push(spec);
64        }
65
66        // For statement-list containers (Program, Block, Package), scan
67        // consecutive statements to detect `require Module; Module->import(...)`
68        // pairs and standalone `require` statements.
69        match &node.kind {
70            NodeKind::Program { statements } | NodeKind::Block { statements } => {
71                Self::walk_statements(statements, file_id, out);
72            }
73            NodeKind::Package { block: Some(block), .. } => {
74                if let NodeKind::Block { statements } = &block.kind {
75                    Self::walk_statements(statements, file_id, out);
76                }
77            }
78            _ => {}
79        }
80
81        for child in node.children() {
82            Self::walk(child, file_id, out);
83        }
84    }
85
86    /// Detect a standalone `ClassName->import(...)` call where `ClassName`
87    /// is a static identifier (not a variable).
88    ///
89    /// Returns `None` when:
90    /// - The node is not a `MethodCall`.
91    /// - The method name is not `"import"`.
92    /// - The object is a variable (those are covered by `walk_statements`).
93    /// - The argument list is entirely static (fully explicit symbols) — those
94    ///   are already captured by `walk_statements` when preceded by `require`.
95    ///
96    /// Returns an `ImportSpec` with `ImportSymbols::Dynamic` when the
97    /// argument list contains any dynamic argument (e.g. `@names`, `$names`).
98    /// Returns `None` when all arguments are static strings or `qw(...)` lists
99    /// (those produce `Explicit` specs through `walk_statements`).
100    fn try_classify_standalone_class_import(node: &Node, file_id: FileId) -> Option<ImportSpec> {
101        let (object, method, args) = match &node.kind {
102            NodeKind::MethodCall { object, method, args } => (object, method, args),
103            _ => return None,
104        };
105
106        if method != "import" {
107            return None;
108        }
109
110        // Only static class names (Identifier nodes), not variables.
111        let class_name = match &object.kind {
112            NodeKind::Identifier { name } => name.as_str(),
113            _ => return None,
114        };
115
116        // Classify the argument list.
117        let symbols = Self::extract_import_call_symbols(args);
118
119        // Only emit evidence when the arguments are Dynamic (unknown at
120        // compile time). Explicit/tag lists are precise and do not need
121        // the conservative "any bareword might be imported" treatment.
122        if !matches!(symbols, ImportSymbols::Dynamic) {
123            return None;
124        }
125
126        let anchor_id = Self::anchor_from_node(node);
127        Some(ImportSpec {
128            module: class_name.to_string(),
129            // ManualImport distinguishes this from a `use Foo` statement —
130            // it is a `Class->import(...)` method call, not a `use` declaration.
131            kind: ImportKind::ManualImport,
132            symbols,
133            provenance: Provenance::DynamicBoundary,
134            confidence: Confidence::Low,
135            file_id: Some(file_id),
136            anchor_id: Some(anchor_id),
137            scope_id: None,
138            span_start_byte: Some(node.location.start as u32),
139        })
140    }
141
142    /// Scan a list of sibling statements for `require` patterns.
143    ///
144    /// Detects:
145    /// - `require Module; Module->import(...)` → `RequireThenImport`
146    /// - `require Module` (standalone) → `Require`
147    /// - `require $var` → `DynamicRequire`
148    ///
149    /// Statements that are part of a `require + import` pair are recorded
150    /// once (not duplicated by the per-node walk).
151    fn walk_statements(statements: &[Node], file_id: FileId, out: &mut Vec<ImportSpec>) {
152        // Track which statement indices have been consumed as part of a
153        // require-then-import pair so the per-node walk does not re-emit them.
154        let mut consumed: std::collections::HashSet<usize> = std::collections::HashSet::new();
155
156        for (i, stmt) in statements.iter().enumerate() {
157            if consumed.contains(&i) {
158                continue;
159            }
160
161            // Unwrap ExpressionStatement to get the inner expression.
162            let expr = Self::unwrap_expression_statement(stmt);
163
164            // Check for `require <something>`.
165            let (require_node, require_args) = match &expr.kind {
166                NodeKind::FunctionCall { name, args } if name == "require" => (stmt, args),
167                _ => continue,
168            };
169
170            // Dynamic require: `require $var`
171            if Self::is_dynamic_require(require_args) {
172                out.push(Self::make_dynamic_require(file_id, require_node));
173                consumed.insert(i);
174                continue;
175            }
176
177            // Static require: extract module name.
178            let module_name = match Self::extract_require_module_name(require_args) {
179                Some(name) => name,
180                None => continue,
181            };
182
183            // Look ahead for `Module->import(...)` in the next statement.
184            let import_spec = if let Some(next_stmt) = statements.get(i + 1) {
185                let next_expr = Self::unwrap_expression_statement(next_stmt);
186                Self::try_match_import_call(next_expr, &module_name)
187            } else {
188                None
189            };
190
191            if let Some((symbols, import_node)) = import_spec {
192                // `require Module; Module->import(...)` → RequireThenImport
193                //
194                // Use the require statement's anchor for the spec.
195                // Choose provenance based on whether the import argument list
196                // is entirely composed of literal strings/qw() words:
197                // - All static (Explicit/Tags/Mixed/Default/None) → LiteralRequireImport
198                //   (guarantees the full symbol set is statically known)
199                // - Dynamic → ExactAst (conservative; symbol set not fully known)
200                let provenance = if matches!(symbols, ImportSymbols::Dynamic) {
201                    Provenance::ExactAst
202                } else {
203                    Provenance::LiteralRequireImport
204                };
205                let anchor_id = Self::anchor_from_node(require_node);
206                let confidence = Self::confidence_for_symbols(&symbols);
207                out.push(ImportSpec {
208                    module: module_name,
209                    kind: ImportKind::RequireThenImport,
210                    symbols,
211                    provenance,
212                    confidence,
213                    file_id: Some(file_id),
214                    anchor_id: Some(anchor_id),
215                    scope_id: None,
216                    span_start_byte: Some(require_node.location.start as u32),
217                });
218                consumed.insert(i);
219                consumed.insert(i + 1);
220                // Also record the import call node index so the per-node
221                // walk does not process it.
222                let _ = import_node;
223            } else {
224                // Standalone `require Module` → Require
225                let anchor_id = Self::anchor_from_node(require_node);
226                out.push(ImportSpec {
227                    module: module_name,
228                    kind: ImportKind::Require,
229                    symbols: ImportSymbols::Default,
230                    provenance: Provenance::ExactAst,
231                    confidence: Confidence::High,
232                    file_id: Some(file_id),
233                    anchor_id: Some(anchor_id),
234                    scope_id: None,
235                    span_start_byte: Some(require_node.location.start as u32),
236                });
237                consumed.insert(i);
238            }
239        }
240    }
241
242    // ── Require helpers ────────────────────────────────────────────────
243
244    /// Unwrap an `ExpressionStatement` to get the inner expression node.
245    /// Returns the node itself if it is not an `ExpressionStatement`.
246    fn unwrap_expression_statement(node: &Node) -> &Node {
247        match &node.kind {
248            NodeKind::ExpressionStatement { expression } => expression,
249            _ => node,
250        }
251    }
252
253    /// Check whether a `require` call's arguments indicate a dynamic require
254    /// (i.e. `require $var`).
255    fn is_dynamic_require(args: &[Node]) -> bool {
256        match args.first() {
257            Some(arg) => matches!(&arg.kind, NodeKind::Variable { .. }),
258            None => false,
259        }
260    }
261
262    /// Extract the module name from a `require` call's arguments.
263    ///
264    /// Handles:
265    /// - `require Foo::Bar` → `"Foo::Bar"` (Identifier)
266    /// - `require "Foo/Bar.pm"` → `"Foo::Bar"` (String, path-to-module conversion)
267    fn extract_require_module_name(args: &[Node]) -> Option<String> {
268        let arg = args.first()?;
269        match &arg.kind {
270            NodeKind::Identifier { name } => Some(name.clone()),
271            NodeKind::String { value, .. } => {
272                // "Foo/Bar.pm" → "Foo::Bar"
273                let cleaned = value.trim_matches('\'').trim_matches('"').trim();
274                let module = cleaned.trim_end_matches(".pm").replace('/', "::");
275                Some(module)
276            }
277            _ => None,
278        }
279    }
280
281    /// Build an [`ImportSpec`] for `require $var` (dynamic require).
282    ///
283    /// Uses `Provenance::DynamicBoundary + Confidence::Low` because the module
284    /// identity is not statically known — only the pattern is known. This
285    /// provenance marks the import site for the diagnostics suppressor so that
286    /// symbols "plausibly imported" via dynamic require are not flagged as
287    /// undefined.
288    fn make_dynamic_require(file_id: FileId, node: &Node) -> ImportSpec {
289        let anchor_id = Self::anchor_from_node(node);
290        ImportSpec {
291            module: String::new(),
292            kind: ImportKind::DynamicRequire,
293            symbols: ImportSymbols::Dynamic,
294            provenance: Provenance::DynamicBoundary,
295            confidence: Confidence::Low,
296            file_id: Some(file_id),
297            anchor_id: Some(anchor_id),
298            scope_id: None,
299            span_start_byte: Some(node.location.start as u32),
300        }
301    }
302
303    /// Try to match a `Module->import(...)` method call node.
304    ///
305    /// Returns `Some((symbols, node))` if the node is a `MethodCall` with
306    /// method `"import"` and the object matches `expected_module`.
307    fn try_match_import_call<'a>(
308        node: &'a Node,
309        expected_module: &str,
310    ) -> Option<(ImportSymbols, &'a Node)> {
311        let (object, method, args) = match &node.kind {
312            NodeKind::MethodCall { object, method, args } => (object, method, args),
313            _ => return None,
314        };
315
316        if method != "import" {
317            return None;
318        }
319
320        // The object must be an Identifier matching the module name.
321        let obj_name = match &object.kind {
322            NodeKind::Identifier { name } => name.as_str(),
323            _ => return None,
324        };
325
326        if obj_name != expected_module {
327            return None;
328        }
329
330        // Extract imported symbols from the argument list.
331        let symbols = Self::extract_import_call_symbols(args);
332        Some((symbols, node))
333    }
334
335    /// Extract [`ImportSymbols`] from the argument list of a `Module->import(...)` call.
336    fn extract_import_call_symbols(args: &[Node]) -> ImportSymbols {
337        if args.is_empty() {
338            return ImportSymbols::Default;
339        }
340
341        let mut names: Vec<String> = Vec::new();
342        let mut tags: Vec<String> = Vec::new();
343        let mut has_dynamic_arg = false;
344
345        for arg in args {
346            has_dynamic_arg |= Self::collect_import_arg_symbols(arg, &mut names, &mut tags);
347        }
348
349        if has_dynamic_arg {
350            return ImportSymbols::Dynamic;
351        }
352
353        if names.is_empty() && tags.is_empty() {
354            return ImportSymbols::Default;
355        }
356
357        if !tags.is_empty() && names.is_empty() {
358            return ImportSymbols::Tags(tags);
359        }
360
361        if !tags.is_empty() && !names.is_empty() {
362            return ImportSymbols::Mixed { tags, names };
363        }
364
365        ImportSymbols::Explicit(names)
366    }
367
368    /// Collect symbol names and tags from a single argument node of an
369    /// `import(...)` call.
370    ///
371    /// Returns `true` when the argument is dynamic or unsupported and should
372    /// prevent the import site from claiming exact symbol names.
373    fn collect_import_arg_symbols(
374        arg: &Node,
375        names: &mut Vec<String>,
376        tags: &mut Vec<String>,
377    ) -> bool {
378        match &arg.kind {
379            NodeKind::String { value, .. } => {
380                let bare = value.trim_matches('\'').trim_matches('"');
381                if let Some(tag) = bare.strip_prefix(':') {
382                    tags.push(tag.to_string());
383                } else if !bare.is_empty() {
384                    names.push(bare.to_string());
385                }
386                false
387            }
388            NodeKind::Identifier { name } => {
389                // Handle qw(...) stored as raw identifier string.
390                if let Some(inner) = Self::parse_qw_content(name) {
391                    for word in inner.split_whitespace() {
392                        if let Some(tag) = word.strip_prefix(':') {
393                            tags.push(tag.to_string());
394                        } else {
395                            names.push(word.to_string());
396                        }
397                    }
398                } else if let Some(tag) = name.strip_prefix(':') {
399                    tags.push(tag.to_string());
400                } else if !name.is_empty() {
401                    names.push(name.clone());
402                }
403                false
404            }
405            NodeKind::Variable { .. } => {
406                // `Foo->import(@names)` / `Foo->import($name)` is dynamic:
407                // do not guess exact imported symbols.
408                true
409            }
410            NodeKind::ArrayLiteral { elements } => {
411                // qw(...) in expression context → ArrayLiteral of String nodes
412                let mut has_dynamic_arg = false;
413                for el in elements {
414                    has_dynamic_arg |= Self::collect_import_arg_symbols(el, names, tags);
415                }
416                has_dynamic_arg
417            }
418            _ => true,
419        }
420    }
421
422    fn confidence_for_symbols(symbols: &ImportSymbols) -> Confidence {
423        if matches!(symbols, ImportSymbols::Dynamic) { Confidence::Low } else { Confidence::High }
424    }
425
426    // ── Classification ──────────────────────────────────────────────────
427
428    /// Classify a single `use` statement into an [`ImportSpec`].
429    ///
430    /// Returns `None` for version pragmas (`use 5.036;`, `use v5.38;`) and
431    /// other non-module-import statements that should not produce import facts.
432    fn classify_use(
433        module: &str,
434        args: &[String],
435        file_id: FileId,
436        node: &Node,
437    ) -> Option<ImportSpec> {
438        // Skip version pragmas — they are not module imports.
439        if Self::is_version_pragma(module) {
440            return None;
441        }
442
443        let anchor_id = Self::anchor_from_node(node);
444
445        // `use constant` is a special pragma that defines constants.
446        if module == "constant" {
447            return Some(Self::classify_use_constant(args, file_id, anchor_id));
448        }
449
450        // Classify by argument shape.
451        let (kind, symbols) = Self::classify_args(args, module, node);
452
453        Some(ImportSpec {
454            module: module.to_string(),
455            kind,
456            symbols,
457            provenance: Provenance::ExactAst,
458            confidence: Confidence::High,
459            file_id: Some(file_id),
460            anchor_id: Some(anchor_id),
461            scope_id: None,
462            span_start_byte: Some(node.location.start as u32),
463        })
464    }
465
466    /// Classify the argument list of a non-constant `use` statement.
467    fn classify_args(args: &[String], module: &str, node: &Node) -> (ImportKind, ImportSymbols) {
468        if args.is_empty() {
469            // Distinguish `use Module;` from `use Module ()`.
470            //
471            // The parser produces empty args for both forms. We use a span-length
472            // heuristic: `use Module;` occupies `"use " + module + ";"` bytes,
473            // while `use Module ()` is longer due to the parentheses.
474            let bare_len = "use ".len() + module.len() + 1; // +1 for ';'
475            let span_len = node.location.end.saturating_sub(node.location.start);
476            if span_len > bare_len {
477                // The source text is longer than a bare `use Module;`, so there
478                // were likely empty parentheses.
479                return (ImportKind::UseEmpty, ImportSymbols::None);
480            }
481            // `use Module;` — bare import, triggers default @EXPORT.
482            return (ImportKind::Use, ImportSymbols::Default);
483        }
484
485        // Collect explicit names, tags, and detect qw() forms.
486        let mut explicit_names: Vec<String> = Vec::new();
487        let mut tags: Vec<String> = Vec::new();
488
489        for arg in args {
490            let trimmed = arg.trim();
491
492            // qw(...) form: "qw(a b c)"
493            if let Some(inner) = Self::parse_qw_content(trimmed) {
494                let words: Vec<String> = inner.split_whitespace().map(|w| w.to_string()).collect();
495                for word in words {
496                    if let Some(tag) = word.strip_prefix(':') {
497                        tags.push(tag.to_string());
498                    } else {
499                        explicit_names.push(word);
500                    }
501                }
502                continue;
503            }
504
505            // Tag argument: ':tag' or ":tag" (with or without quotes)
506            let unquoted = Self::unquote(trimmed);
507            if let Some(tag) = unquoted.strip_prefix(':') {
508                tags.push(tag.to_string());
509                continue;
510            }
511
512            // Skip fat-arrow values and punctuation that are part of overload-style
513            // key-value pairs (e.g. `use overload '""' => \&stringify`).
514            if trimmed == "=>" || trimmed == "," || trimmed == "\\" {
515                continue;
516            }
517
518            // Regular symbol name.
519            if Self::looks_like_symbol_name(trimmed) {
520                explicit_names.push(Self::unquote(trimmed).to_string());
521            }
522        }
523
524        // Empty parens: `use Module ()`
525        if explicit_names.is_empty() && tags.is_empty() && !args.is_empty() {
526            // The parser consumed `()` but produced no meaningful args.
527            // However, args may contain punctuation tokens from complex use statements.
528            // If all args are non-symbol tokens, treat as empty import.
529            let has_any_symbol = args.iter().any(|a| {
530                let t = a.trim();
531                Self::looks_like_symbol_name(t) || Self::parse_qw_content(t).is_some()
532            });
533            if !has_any_symbol {
534                return (ImportKind::UseEmpty, ImportSymbols::None);
535            }
536        }
537
538        // Tags only.
539        if !tags.is_empty() && explicit_names.is_empty() {
540            return (ImportKind::UseTag, ImportSymbols::Tags(tags));
541        }
542
543        // Mixed tags and names.
544        if !tags.is_empty() && !explicit_names.is_empty() {
545            return (
546                ImportKind::UseExplicitList,
547                ImportSymbols::Mixed { tags, names: explicit_names },
548            );
549        }
550
551        // Explicit symbol list.
552        if !explicit_names.is_empty() {
553            return (ImportKind::UseExplicitList, ImportSymbols::Explicit(explicit_names));
554        }
555
556        // Fallback: bare use with unrecognised args.
557        (ImportKind::Use, ImportSymbols::Default)
558    }
559
560    /// Classify `use constant` statements.
561    fn classify_use_constant(args: &[String], file_id: FileId, anchor_id: AnchorId) -> ImportSpec {
562        let mut constant_names: Vec<String> = Vec::new();
563
564        if args.is_empty() {
565            // `use constant;` — degenerate, no constants defined.
566            return ImportSpec {
567                module: "constant".to_string(),
568                kind: ImportKind::UseConstant,
569                symbols: ImportSymbols::None,
570                provenance: Provenance::ExactAst,
571                confidence: Confidence::High,
572                file_id: Some(file_id),
573                anchor_id: Some(anchor_id),
574                scope_id: None,
575                span_start_byte: None, // position not needed for UseConstant
576            };
577        }
578
579        // Hash-ref form: `use constant { FOO => 1, BAR => 2 }`
580        // Args look like: ["{", "FOO", "=>", "1", "BAR", "=>", "2", "}"]
581        if args.first().map(|a| a.as_str()) == Some("{") {
582            let mut i = 1; // skip opening brace
583            while i < args.len() {
584                let token = args[i].trim();
585                if token == "}" || token == "=>" || token == "," {
586                    i += 1;
587                    continue;
588                }
589                // After a name, skip the => and value
590                if i + 1 < args.len() && args[i + 1].trim() == "=>" {
591                    constant_names.push(token.to_string());
592                    // Skip => and value
593                    i += 3;
594                } else {
595                    i += 1;
596                }
597            }
598        }
599        // qw() form: `use constant qw(ONE TWO THREE)`
600        else if let Some(inner) = args.first().and_then(|a| Self::parse_qw_content(a.trim())) {
601            let words: Vec<String> = inner.split_whitespace().map(|w| w.to_string()).collect();
602            constant_names.extend(words);
603        }
604        // Scalar form: `use constant PI => 3.14`
605        // Args look like: ["PI", "3.14"] or ["PI", "=>", "3.14"]
606        else if let Some(name) = args.first() {
607            let trimmed = name.trim();
608            if Self::looks_like_constant_name(trimmed) {
609                constant_names.push(trimmed.to_string());
610            }
611        }
612
613        // Deduplicate while preserving order.
614        let mut seen = std::collections::HashSet::new();
615        constant_names.retain(|n| seen.insert(n.clone()));
616
617        let symbols = if constant_names.is_empty() {
618            ImportSymbols::None
619        } else {
620            ImportSymbols::Explicit(constant_names)
621        };
622
623        ImportSpec {
624            module: "constant".to_string(),
625            kind: ImportKind::UseConstant,
626            symbols,
627            provenance: Provenance::ExactAst,
628            confidence: Confidence::High,
629            file_id: Some(file_id),
630            anchor_id: Some(anchor_id),
631            scope_id: None,
632            span_start_byte: None, // position not needed for UseConstant
633        }
634    }
635
636    // ── Helpers ─────────────────────────────────────────────────────────
637
638    /// Derive an [`AnchorId`] from a node's byte-offset span.
639    fn anchor_from_node(node: &Node) -> AnchorId {
640        // Use the start byte offset as a deterministic anchor ID.
641        // This is unique per use-statement within a file.
642        AnchorId(node.location.start as u64)
643    }
644
645    /// Check whether a module string is a version pragma (e.g. `5.036`, `v5.38`).
646    fn is_version_pragma(module: &str) -> bool {
647        // Numeric version: 5.036, 5.10
648        if module.chars().next().is_some_and(|c| c.is_ascii_digit()) {
649            return true;
650        }
651        // v-string: v5.38, v5.12.0
652        if module.starts_with('v')
653            && module.len() > 1
654            && module[1..].chars().all(|c| c.is_ascii_digit() || c == '.')
655        {
656            return true;
657        }
658        false
659    }
660
661    /// Extract the inner content of a `qw(...)` string.
662    ///
663    /// Returns `Some("a b c")` for `"qw(a b c)"`, `None` otherwise.
664    fn parse_qw_content(s: &str) -> Option<&str> {
665        let rest = s.strip_prefix("qw")?;
666        // The parser normalises all qw delimiters to parentheses.
667        let inner = rest.strip_prefix('(')?.strip_suffix(')')?;
668        Some(inner)
669    }
670
671    /// Remove surrounding single or double quotes from a string.
672    fn unquote(s: &str) -> &str {
673        if (s.starts_with('\'') && s.ends_with('\'')) || (s.starts_with('"') && s.ends_with('"')) {
674            if s.len() >= 2 {
675                return &s[1..s.len() - 1];
676            }
677        }
678        s
679    }
680
681    /// Heuristic: does this string look like a Perl symbol name?
682    fn looks_like_symbol_name(s: &str) -> bool {
683        let s = Self::unquote(s);
684        if s.is_empty() {
685            return false;
686        }
687        // Tags start with ':'
688        if s.starts_with(':') {
689            return true;
690        }
691        // Sigiled variables: $foo, @bar, %baz, &sub, *glob
692        if s.starts_with('$')
693            || s.starts_with('@')
694            || s.starts_with('%')
695            || s.starts_with('&')
696            || s.starts_with('*')
697        {
698            return true;
699        }
700        // Bare word: starts with letter or underscore
701        s.chars().next().is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
702    }
703
704    /// Heuristic: does this string look like a constant name?
705    ///
706    /// Constants are typically UPPER_CASE identifiers.
707    fn looks_like_constant_name(s: &str) -> bool {
708        if s.is_empty() {
709            return false;
710        }
711        s.chars().next().is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
712    }
713}
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718    use crate::Parser;
719
720    /// Parse Perl source and extract import specs.
721    fn parse_and_extract(code: &str) -> Vec<ImportSpec> {
722        let mut parser = Parser::new(code);
723        let ast = match parser.parse() {
724            Ok(ast) => ast,
725            Err(_) => return Vec::new(),
726        };
727        ImportExtractor::extract(&ast, FileId(1))
728    }
729
730    // ── use Module qw(a b) → UseExplicitList ────────────────────────────
731
732    #[test]
733    fn test_use_explicit_list_qw() -> Result<(), String> {
734        let specs = parse_and_extract("use List::Util qw(first reduce any);");
735        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
736
737        assert_eq!(spec.module, "List::Util");
738        assert_eq!(spec.kind, ImportKind::UseExplicitList);
739        if let ImportSymbols::Explicit(names) = &spec.symbols {
740            assert!(names.contains(&"first".to_string()), "missing 'first' in {names:?}");
741            assert!(names.contains(&"reduce".to_string()), "missing 'reduce' in {names:?}");
742            assert!(names.contains(&"any".to_string()), "missing 'any' in {names:?}");
743        } else {
744            return Err(format!("expected Explicit, got {:?}", spec.symbols));
745        }
746        assert_eq!(spec.file_id, Some(FileId(1)));
747        assert!(spec.anchor_id.is_some());
748        Ok(())
749    }
750
751    #[test]
752    fn test_use_explicit_list_quoted_strings() -> Result<(), String> {
753        let specs = parse_and_extract("use Exporter 'import';");
754        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
755
756        assert_eq!(spec.module, "Exporter");
757        assert_eq!(spec.kind, ImportKind::UseExplicitList);
758        if let ImportSymbols::Explicit(names) = &spec.symbols {
759            assert!(names.contains(&"import".to_string()), "missing 'import' in {names:?}");
760        } else {
761            return Err(format!("expected Explicit, got {:?}", spec.symbols));
762        }
763        Ok(())
764    }
765
766    // ── use Module () → UseEmpty ────────────────────────────────────────
767    //
768    // NOTE: The current parser represents both `use Module;` and `use Module ()`
769    // with empty args. We detect empty-parens by checking for an AST node whose
770    // source text contains `()`. When the parser cannot distinguish the two
771    // forms, both are classified as bare `Use`/`Default`.
772
773    #[test]
774    fn test_use_empty_parens() -> Result<(), String> {
775        let specs = parse_and_extract("use POSIX ();");
776        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
777
778        assert_eq!(spec.module, "POSIX");
779        // The parser produces empty args for both `use POSIX;` and `use POSIX ()`.
780        // We detect the empty-parens form by inspecting the source span length
781        // relative to the module name length.
782        assert_eq!(spec.kind, ImportKind::UseEmpty);
783        assert_eq!(spec.symbols, ImportSymbols::None);
784        Ok(())
785    }
786
787    // ── use Module ':tag' → UseTag ──────────────────────────────────────
788
789    #[test]
790    fn test_use_tag_single() -> Result<(), String> {
791        let specs = parse_and_extract("use POSIX ':sys_wait_h';");
792        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
793
794        assert_eq!(spec.module, "POSIX");
795        assert_eq!(spec.kind, ImportKind::UseTag);
796        if let ImportSymbols::Tags(tags) = &spec.symbols {
797            assert!(tags.contains(&"sys_wait_h".to_string()), "missing tag in {tags:?}");
798        } else {
799            return Err(format!("expected Tags, got {:?}", spec.symbols));
800        }
801        Ok(())
802    }
803
804    #[test]
805    fn test_use_tag_in_qw() -> Result<(), String> {
806        let specs = parse_and_extract("use Fcntl qw(:flock);");
807        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
808
809        assert_eq!(spec.module, "Fcntl");
810        assert_eq!(spec.kind, ImportKind::UseTag);
811        if let ImportSymbols::Tags(tags) = &spec.symbols {
812            assert!(tags.contains(&"flock".to_string()), "missing tag in {tags:?}");
813        } else {
814            return Err(format!("expected Tags, got {:?}", spec.symbols));
815        }
816        Ok(())
817    }
818
819    // ── use Module (bare) → Use/Default ─────────────────────────────────
820
821    #[test]
822    fn test_use_bare() -> Result<(), String> {
823        let specs = parse_and_extract("use strict;");
824        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
825
826        assert_eq!(spec.module, "strict");
827        assert_eq!(spec.kind, ImportKind::Use);
828        assert_eq!(spec.symbols, ImportSymbols::Default);
829        Ok(())
830    }
831
832    #[test]
833    fn test_use_bare_qualified() -> Result<(), String> {
834        let specs = parse_and_extract("use Data::Dumper;");
835        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
836
837        assert_eq!(spec.module, "Data::Dumper");
838        assert_eq!(spec.kind, ImportKind::Use);
839        assert_eq!(spec.symbols, ImportSymbols::Default);
840        Ok(())
841    }
842
843    // ── use constant → UseConstant ──────────────────────────────────────
844
845    #[test]
846    fn test_use_constant_scalar() -> Result<(), String> {
847        let specs = parse_and_extract("use constant PI => 3.14;");
848        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
849
850        assert_eq!(spec.module, "constant");
851        assert_eq!(spec.kind, ImportKind::UseConstant);
852        if let ImportSymbols::Explicit(names) = &spec.symbols {
853            assert!(names.contains(&"PI".to_string()), "missing 'PI' in {names:?}");
854        } else {
855            return Err(format!("expected Explicit, got {:?}", spec.symbols));
856        }
857        Ok(())
858    }
859
860    #[test]
861    fn test_use_constant_hash_ref() -> Result<(), String> {
862        let specs = parse_and_extract("use constant { FOO => 1, BAR => 2 };");
863        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
864
865        assert_eq!(spec.module, "constant");
866        assert_eq!(spec.kind, ImportKind::UseConstant);
867        if let ImportSymbols::Explicit(names) = &spec.symbols {
868            assert!(names.contains(&"FOO".to_string()), "missing 'FOO' in {names:?}");
869            assert!(names.contains(&"BAR".to_string()), "missing 'BAR' in {names:?}");
870        } else {
871            return Err(format!("expected Explicit, got {:?}", spec.symbols));
872        }
873        Ok(())
874    }
875
876    #[test]
877    fn test_use_constant_empty() -> Result<(), String> {
878        let specs = parse_and_extract("use constant;");
879        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
880
881        assert_eq!(spec.module, "constant");
882        assert_eq!(spec.kind, ImportKind::UseConstant);
883        assert_eq!(spec.symbols, ImportSymbols::None);
884        Ok(())
885    }
886
887    // ── Version pragmas are skipped ─────────────────────────────────────
888
889    #[test]
890    fn test_version_pragma_skipped() -> Result<(), String> {
891        let specs = parse_and_extract("use 5.036;");
892        assert!(specs.is_empty(), "version pragma should not produce ImportSpec");
893        Ok(())
894    }
895
896    #[test]
897    fn test_vstring_pragma_skipped() -> Result<(), String> {
898        let specs = parse_and_extract("use v5.38;");
899        assert!(specs.is_empty(), "v-string pragma should not produce ImportSpec");
900        Ok(())
901    }
902
903    // ── Multiple use statements ─────────────────────────────────────────
904
905    #[test]
906    fn test_multiple_use_statements() -> Result<(), String> {
907        let code = r#"
908use strict;
909use warnings;
910use List::Util qw(first any);
911use POSIX ();
912use constant MAX => 100;
913"#;
914        let specs = parse_and_extract(code);
915        assert_eq!(specs.len(), 5, "expected 5 ImportSpecs, got {}", specs.len());
916
917        // strict — bare
918        assert_eq!(specs[0].module, "strict");
919        assert_eq!(specs[0].kind, ImportKind::Use);
920
921        // warnings — bare
922        assert_eq!(specs[1].module, "warnings");
923        assert_eq!(specs[1].kind, ImportKind::Use);
924
925        // List::Util — explicit list
926        assert_eq!(specs[2].module, "List::Util");
927        assert_eq!(specs[2].kind, ImportKind::UseExplicitList);
928
929        // POSIX — empty
930        assert_eq!(specs[3].module, "POSIX");
931        assert_eq!(specs[3].kind, ImportKind::UseEmpty);
932
933        // constant — use constant
934        assert_eq!(specs[4].module, "constant");
935        assert_eq!(specs[4].kind, ImportKind::UseConstant);
936
937        Ok(())
938    }
939
940    // ── Anchor and file_id are populated ────────────────────────────────
941
942    #[test]
943    fn test_anchor_and_file_id_populated() -> Result<(), String> {
944        let specs = parse_and_extract("use Foo::Bar qw(baz);");
945        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
946
947        assert_eq!(spec.file_id, Some(FileId(1)));
948        assert!(spec.anchor_id.is_some(), "anchor_id should be populated");
949        assert_eq!(spec.provenance, Provenance::ExactAst);
950        assert_eq!(spec.confidence, Confidence::High);
951        Ok(())
952    }
953
954    // ── Nested use in package block ─────────────────────────────────────
955
956    #[test]
957    fn test_use_inside_package_block() -> Result<(), String> {
958        let code = r#"
959package MyModule;
960use Exporter 'import';
961our @EXPORT = qw(foo);
9621;
963"#;
964        let specs = parse_and_extract(code);
965        let exporter_spec =
966            specs.iter().find(|s| s.module == "Exporter").ok_or("expected Exporter ImportSpec")?;
967
968        assert_eq!(exporter_spec.kind, ImportKind::UseExplicitList);
969        if let ImportSymbols::Explicit(names) = &exporter_spec.symbols {
970            assert!(names.contains(&"import".to_string()));
971        } else {
972            return Err(format!("expected Explicit, got {:?}", exporter_spec.symbols));
973        }
974        Ok(())
975    }
976
977    // ── Mixed tags and names ────────────────────────────────────────────
978
979    #[test]
980    fn test_use_mixed_tags_and_names() -> Result<(), String> {
981        let specs = parse_and_extract("use Fcntl qw(:flock LOCK_EX LOCK_NB);");
982        let spec = specs.first().ok_or("expected at least one ImportSpec")?;
983
984        assert_eq!(spec.module, "Fcntl");
985        assert_eq!(spec.kind, ImportKind::UseExplicitList);
986        if let ImportSymbols::Mixed { tags, names } = &spec.symbols {
987            assert!(tags.contains(&"flock".to_string()), "missing tag 'flock' in {tags:?}");
988            assert!(names.contains(&"LOCK_EX".to_string()), "missing 'LOCK_EX' in {names:?}");
989            assert!(names.contains(&"LOCK_NB".to_string()), "missing 'LOCK_NB' in {names:?}");
990        } else {
991            return Err(format!("expected Mixed, got {:?}", spec.symbols));
992        }
993        Ok(())
994    }
995
996    // ── require Module → Require ────────────────────────────────────────
997
998    #[test]
999    fn test_require_bare_module() -> Result<(), String> {
1000        let specs = parse_and_extract("require Foo::Bar;");
1001        let spec = specs
1002            .iter()
1003            .find(|s| s.module == "Foo::Bar")
1004            .ok_or("expected ImportSpec for Foo::Bar")?;
1005
1006        assert_eq!(spec.kind, ImportKind::Require);
1007        assert_eq!(spec.symbols, ImportSymbols::Default);
1008        assert_eq!(spec.provenance, Provenance::ExactAst);
1009        assert_eq!(spec.confidence, Confidence::High);
1010        assert_eq!(spec.file_id, Some(FileId(1)));
1011        assert!(spec.anchor_id.is_some(), "anchor_id should be populated");
1012        Ok(())
1013    }
1014
1015    // ── require Module; Module->import(...) → RequireThenImport ─────────
1016
1017    #[test]
1018    fn test_require_then_import_with_qw() -> Result<(), String> {
1019        let code = r#"
1020require Foo::Bar;
1021Foo::Bar->import(qw(alpha beta));
1022"#;
1023        let specs = parse_and_extract(code);
1024        let spec = specs
1025            .iter()
1026            .find(|s| s.module == "Foo::Bar")
1027            .ok_or("expected ImportSpec for Foo::Bar")?;
1028
1029        assert_eq!(spec.kind, ImportKind::RequireThenImport);
1030        if let ImportSymbols::Explicit(names) = &spec.symbols {
1031            assert!(names.contains(&"alpha".to_string()), "missing 'alpha' in {names:?}");
1032            assert!(names.contains(&"beta".to_string()), "missing 'beta' in {names:?}");
1033        } else {
1034            return Err(format!("expected Explicit, got {:?}", spec.symbols));
1035        }
1036        // Fully literal import list → LiteralRequireImport provenance.
1037        assert_eq!(spec.provenance, Provenance::LiteralRequireImport);
1038        assert_eq!(spec.confidence, Confidence::High);
1039        Ok(())
1040    }
1041
1042    #[test]
1043    fn test_require_then_import_bare() -> Result<(), String> {
1044        let code = r#"
1045require Some::Module;
1046Some::Module->import();
1047"#;
1048        let specs = parse_and_extract(code);
1049        let spec = specs
1050            .iter()
1051            .find(|s| s.module == "Some::Module")
1052            .ok_or("expected ImportSpec for Some::Module")?;
1053
1054        assert_eq!(spec.kind, ImportKind::RequireThenImport);
1055        assert_eq!(spec.symbols, ImportSymbols::Default);
1056        Ok(())
1057    }
1058
1059    #[test]
1060    fn test_require_then_import_quoted_strings() -> Result<(), String> {
1061        let code = r#"
1062require Foo::Bar;
1063Foo::Bar->import('alpha', 'beta');
1064"#;
1065        let specs = parse_and_extract(code);
1066        let spec = specs
1067            .iter()
1068            .find(|s| s.module == "Foo::Bar")
1069            .ok_or("expected ImportSpec for Foo::Bar")?;
1070
1071        assert_eq!(spec.kind, ImportKind::RequireThenImport);
1072        if let ImportSymbols::Explicit(names) = &spec.symbols {
1073            assert!(names.contains(&"alpha".to_string()), "missing 'alpha' in {names:?}");
1074            assert!(names.contains(&"beta".to_string()), "missing 'beta' in {names:?}");
1075        } else {
1076            return Err(format!("expected Explicit, got {:?}", spec.symbols));
1077        }
1078        // Fully literal quoted-string import → LiteralRequireImport provenance.
1079        assert_eq!(spec.provenance, Provenance::LiteralRequireImport);
1080        assert_eq!(spec.confidence, Confidence::High);
1081        Ok(())
1082    }
1083
1084    #[test]
1085    fn test_require_then_import_dynamic_symbol_list() -> Result<(), String> {
1086        let code = r#"
1087require Foo::Bar;
1088Foo::Bar->import(@names);
1089"#;
1090        let specs = parse_and_extract(code);
1091        let spec = specs
1092            .iter()
1093            .find(|s| s.module == "Foo::Bar")
1094            .ok_or("expected ImportSpec for Foo::Bar")?;
1095
1096        assert_eq!(spec.kind, ImportKind::RequireThenImport);
1097        assert_eq!(spec.symbols, ImportSymbols::Dynamic);
1098        assert_eq!(spec.confidence, Confidence::Low);
1099        Ok(())
1100    }
1101
1102    // ── require $var → DynamicRequire ───────────────────────────────────
1103
1104    #[test]
1105    fn test_require_dynamic_variable() -> Result<(), String> {
1106        let specs = parse_and_extract("require $module;");
1107        let spec = specs
1108            .iter()
1109            .find(|s| s.kind == ImportKind::DynamicRequire)
1110            .ok_or("expected DynamicRequire ImportSpec")?;
1111
1112        assert_eq!(spec.module, "");
1113        assert_eq!(spec.symbols, ImportSymbols::Dynamic);
1114        // DynamicRequire must use DynamicBoundary provenance (Q5 architectural decision):
1115        // the module identity is not statically known, so we cannot claim ExactAst.
1116        assert_eq!(spec.provenance, Provenance::DynamicBoundary);
1117        assert_eq!(spec.confidence, Confidence::Low);
1118        assert_eq!(spec.file_id, Some(FileId(1)));
1119        assert!(spec.anchor_id.is_some(), "anchor_id should be populated");
1120        Ok(())
1121    }
1122
1123    // ── Mixed use and require statements ────────────────────────────────
1124
1125    #[test]
1126    fn test_mixed_use_and_require() -> Result<(), String> {
1127        let code = r#"
1128use strict;
1129use warnings;
1130require Foo::Bar;
1131Foo::Bar->import(qw(baz));
1132require $dynamic;
1133"#;
1134        let specs = parse_and_extract(code);
1135
1136        // strict — bare use
1137        let strict_spec =
1138            specs.iter().find(|s| s.module == "strict").ok_or("expected strict ImportSpec")?;
1139        assert_eq!(strict_spec.kind, ImportKind::Use);
1140
1141        // warnings — bare use
1142        let warnings_spec =
1143            specs.iter().find(|s| s.module == "warnings").ok_or("expected warnings ImportSpec")?;
1144        assert_eq!(warnings_spec.kind, ImportKind::Use);
1145
1146        // Foo::Bar — require then import
1147        let foo_spec =
1148            specs.iter().find(|s| s.module == "Foo::Bar").ok_or("expected Foo::Bar ImportSpec")?;
1149        assert_eq!(foo_spec.kind, ImportKind::RequireThenImport);
1150        if let ImportSymbols::Explicit(names) = &foo_spec.symbols {
1151            assert!(names.contains(&"baz".to_string()), "missing 'baz' in {names:?}");
1152        } else {
1153            return Err(format!("expected Explicit, got {:?}", foo_spec.symbols));
1154        }
1155
1156        // dynamic require
1157        let dyn_spec = specs
1158            .iter()
1159            .find(|s| s.kind == ImportKind::DynamicRequire)
1160            .ok_or("expected DynamicRequire ImportSpec")?;
1161        assert_eq!(dyn_spec.symbols, ImportSymbols::Dynamic);
1162
1163        Ok(())
1164    }
1165
1166    // ── require with string path → Require ──────────────────────────────
1167
1168    #[test]
1169    fn test_require_string_path() -> Result<(), String> {
1170        let specs = parse_and_extract(r#"require "Foo/Bar.pm";"#);
1171        let spec = specs
1172            .iter()
1173            .find(|s| s.module == "Foo::Bar")
1174            .ok_or("expected ImportSpec for Foo::Bar")?;
1175
1176        assert_eq!(spec.kind, ImportKind::Require);
1177        assert_eq!(spec.symbols, ImportSymbols::Default);
1178        Ok(())
1179    }
1180
1181    // ── standalone ClassName->import(@names) — Case 3 (PR-B) ────────────
1182
1183    #[test]
1184    fn standalone_class_dynamic_import_produces_dynamic_spec() -> Result<(), String> {
1185        // `Foo->import(@names)` — static class, dynamic arg list.
1186        // Should produce one ImportSpec with ImportSymbols::Dynamic and
1187        // ImportKind::ManualImport (not Use — it's a method call, not a `use` statement).
1188        let specs = parse_and_extract(r#"Foo->import(@names);"#);
1189        let spec = specs
1190            .iter()
1191            .find(|s| s.module == "Foo" && matches!(s.symbols, ImportSymbols::Dynamic))
1192            .ok_or("expected Dynamic ImportSpec for Foo")?;
1193
1194        assert_eq!(spec.provenance, Provenance::DynamicBoundary);
1195        assert_eq!(spec.confidence, Confidence::Low);
1196        assert_eq!(
1197            spec.kind,
1198            ImportKind::ManualImport,
1199            "Class->import(@names) must use ManualImport, not Use"
1200        );
1201        Ok(())
1202    }
1203
1204    #[test]
1205    fn standalone_class_explicit_import_produces_no_dynamic_spec() -> Result<(), String> {
1206        // `Foo->import('bar')` — static class, static arg list.
1207        // Should NOT produce a Dynamic ImportSpec (explicit symbols only).
1208        let specs = parse_and_extract(r#"Foo->import('bar');"#);
1209        let dynamic_specs: Vec<_> =
1210            specs.iter().filter(|s| matches!(s.symbols, ImportSymbols::Dynamic)).collect();
1211
1212        assert!(dynamic_specs.is_empty(), "explicit import args must not produce a Dynamic spec");
1213        Ok(())
1214    }
1215
1216    #[test]
1217    fn variable_class_import_does_not_produce_standalone_spec() -> Result<(), String> {
1218        // `$var->import(@names)` — variable object, not a static class name.
1219        // The standalone extractor should not match variable-object calls.
1220        let specs = parse_and_extract(r#"$var->import(@names);"#);
1221        // Variable-object calls are handled by require+import pair logic, not
1222        // the standalone path. Without a require, this should produce no spec.
1223        let standalone_dynamic: Vec<_> = specs
1224            .iter()
1225            .filter(|s| matches!(s.symbols, ImportSymbols::Dynamic) && s.module.is_empty())
1226            .collect();
1227
1228        // The standalone extractor only handles Identifier objects, so this
1229        // should produce nothing via the standalone path.
1230        assert!(
1231            standalone_dynamic.is_empty(),
1232            "variable-class import without require must not produce standalone Dynamic spec"
1233        );
1234        Ok(())
1235    }
1236}