Skip to main content

perl_module/import/
mod.rs

1//! Single-line Perl import head parsing and literal require/import extraction.
2//!
3//! Parse a single source line that starts with `use` or `require` and return
4//! the first import token with stable byte offsets.
5//!
6//! Also provides [`extract_require_import_symbols`], a text-level extractor
7//! that recognises the literal `require Module; Module->import(...)` adjacency
8//! pattern in multi-line source without requiring AST construction.
9
10/// When a module is loaded relative to program execution.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum LoadTiming {
13    /// Module is loaded at compile time (e.g. `use`).
14    CompileTime,
15    /// Module is loaded at runtime (e.g. `require`).
16    Runtime,
17}
18
19/// Whether the module's `import` method is called after loading.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum ImportBehavior {
22    /// The module's `import` method is called (as with `use`).
23    CallsImport,
24    /// No `import` call is made (as with `require`).
25    NoImport,
26}
27
28/// Semantic description of a `use`/`require` dispatch form.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub struct DispatchSemantics {
31    /// When the module load happens.
32    pub load_timing: LoadTiming,
33    /// Whether `import` is called on the loaded module.
34    pub import_behavior: ImportBehavior,
35}
36
37impl DispatchSemantics {
38    /// A short human-readable description suitable for hover text.
39    #[must_use]
40    pub fn hover_description(&self) -> &'static str {
41        match (self.load_timing, self.import_behavior) {
42            (LoadTiming::CompileTime, ImportBehavior::CallsImport) => {
43                "compile-time load; calls import()"
44            }
45            (LoadTiming::Runtime, ImportBehavior::NoImport) => "runtime load; no import() call",
46            (LoadTiming::CompileTime, ImportBehavior::NoImport) => {
47                "compile-time load; no import() call"
48            }
49            (LoadTiming::Runtime, ImportBehavior::CallsImport) => "runtime load; calls import()",
50        }
51    }
52}
53
54/// How a `use` statement spells its import list.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum ImportListForm {
57    /// `use Module;`
58    Default,
59    /// `use Module ();`
60    Empty,
61    /// `use Module (...)`
62    Explicit,
63}
64
65/// Distinguishes the two syntactic forms of `require`.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum RequireForm {
68    /// `require Module::Name` — bare module name.
69    ModuleName,
70    /// `require "path/to/file.pm"` or `require 'path/to/file.pm'` — quoted file path.
71    FilePath,
72}
73
74/// Classifies the import statement form for a parsed line.
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum ModuleImportKind {
77    /// `use Module::Name;`
78    Use,
79    /// `require Module::Name;` or `require "file.pm";`
80    Require,
81    /// `use parent ...`
82    UseParent,
83    /// `use base ...`
84    UseBase,
85}
86
87impl ModuleImportKind {
88    /// Returns the dispatch semantics for this import kind.
89    #[must_use]
90    pub fn dispatch_semantics(self) -> DispatchSemantics {
91        match self {
92            ModuleImportKind::Use | ModuleImportKind::UseParent | ModuleImportKind::UseBase => {
93                DispatchSemantics {
94                    load_timing: LoadTiming::CompileTime,
95                    import_behavior: ImportBehavior::CallsImport,
96                }
97            }
98            ModuleImportKind::Require => DispatchSemantics {
99                load_timing: LoadTiming::Runtime,
100                import_behavior: ImportBehavior::NoImport,
101            },
102        }
103    }
104}
105
106/// Parsed leading import token from a `use`/`require` line.
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108pub struct ModuleImportHead<'a> {
109    /// Parsed statement kind.
110    pub kind: ModuleImportKind,
111    /// First token after `use` or `require` (quotes stripped for file-path forms).
112    pub token: &'a str,
113    /// Inclusive byte start offset of `token` in the full line.
114    pub token_start: usize,
115    /// Exclusive byte end offset of `token` in the full line.
116    pub token_end: usize,
117    /// For `require`, whether the argument was a quoted file path or a bare module name.
118    /// Always `None` for `use` forms.
119    require_form: Option<RequireForm>,
120    /// For `use` statements, how the import list is spelled.
121    pub import_list: Option<ImportListForm>,
122}
123
124/// Resolve a known export tag to its symbol list for a specific module.
125///
126/// The `tag` argument can be passed with or without a leading `:`.
127/// Returns `None` when the module/tag pair is not in the built-in catalog.
128#[must_use]
129pub fn resolve_known_export_tag(module: &str, tag: &str) -> Option<&'static [&'static str]> {
130    let normalized_tag = tag.strip_prefix(':').unwrap_or(tag);
131    match (module, normalized_tag) {
132        ("POSIX", "sys_wait_h") => Some(&["WIFEXITED", "WEXITSTATUS", "WIFSIGNALED", "WTERMSIG"]),
133        ("POSIX", "fcntl_h") => Some(&["F_GETFL", "F_SETFL", "F_SETFD", "F_GETFD"]),
134        ("POSIX", "termios_h") => Some(&["TCSANOW", "TCSADRAIN", "TCSAFLUSH", "B9600"]),
135        ("File::Find", "find") => Some(&["find", "finddepth"]),
136        ("Fcntl", "seek") => Some(&["SEEK_SET", "SEEK_CUR", "SEEK_END"]),
137        ("Fcntl", "lock") => Some(&["LOCK_SH", "LOCK_EX", "LOCK_NB", "LOCK_UN"]),
138        ("Encode", "fallback") => Some(&["FB_DEFAULT", "FB_CROAK", "FB_QUIET", "FB_WARN"]),
139        _ => None,
140    }
141}
142
143impl<'a> ModuleImportHead<'a> {
144    /// Returns the [`RequireForm`] for `require` statements, or `None` for `use` forms.
145    #[must_use]
146    pub fn require_form(&self) -> Option<RequireForm> {
147        self.require_form
148    }
149
150    /// Returns the module name for resolution purposes.
151    ///
152    /// For `require "Foo/Bar.pm"` (FilePath form with `.pm` extension), converts
153    /// the file-path token to canonical module-name format (`Foo::Bar`).
154    ///
155    /// All other forms (`.pl`, extensionless, bare `ModuleName`, `use` statements)
156    /// return the token unchanged. Does NOT mutate `token`, `token_start`, or
157    /// `token_end` — the original offsets and raw token remain valid.
158    #[must_use]
159    pub fn token_as_module_name(&self) -> String {
160        if self.require_form == Some(RequireForm::FilePath) && self.token.ends_with(".pm") {
161            crate::path::module_path_to_name(self.token)
162        } else {
163            self.token.to_owned()
164        }
165    }
166}
167
168/// Parse the leading import token of a single Perl source line.
169///
170/// Returns [`None`] when the line does not start with `use` or `require`
171/// (after leading whitespace) or when no token is present after the keyword.
172#[must_use]
173pub fn parse_module_import_head(line: &str) -> Option<ModuleImportHead<'_>> {
174    if let Some((token, token_start, token_end)) = parse_statement_head(line, "use") {
175        let kind = match token {
176            "parent" => ModuleImportKind::UseParent,
177            "base" => ModuleImportKind::UseBase,
178            _ => ModuleImportKind::Use,
179        };
180
181        let import_list = match kind {
182            ModuleImportKind::Use => Some(classify_use_import_list(&line[token_end..])),
183            ModuleImportKind::UseParent | ModuleImportKind::UseBase => None,
184            ModuleImportKind::Require => None,
185        };
186
187        return Some(ModuleImportHead {
188            kind,
189            token,
190            token_start,
191            token_end,
192            require_form: None,
193            import_list,
194        });
195    }
196
197    if let Some(result) = parse_require_head(line) {
198        return Some(result);
199    }
200
201    None
202}
203
204/// Parse a `require` statement, handling both bare module names and quoted file paths.
205fn parse_require_head(line: &str) -> Option<ModuleImportHead<'_>> {
206    let trimmed = line.trim_start();
207    let leading = line.len().saturating_sub(trimmed.len());
208
209    let rest = trimmed.strip_prefix("require")?;
210    if !rest.chars().next().is_some_and(char::is_whitespace) {
211        return None;
212    }
213
214    let after_keyword = leading + "require".len();
215
216    let rest_trimmed = rest.trim_start();
217    let quote_offset = rest.len() - rest_trimmed.len();
218
219    if let Some(quote_char) = rest_trimmed.chars().next().filter(|ch| *ch == '"' || *ch == '\'') {
220        let quoted = &rest_trimmed[quote_char.len_utf8()..];
221        let close_idx = quoted.find(quote_char)?;
222        let inner = &quoted[..close_idx];
223
224        let token_start = after_keyword + quote_offset + quote_char.len_utf8();
225        let token_end = token_start + inner.len();
226        return Some(ModuleImportHead {
227            kind: ModuleImportKind::Require,
228            token: inner,
229            token_start,
230            token_end,
231            require_form: Some(RequireForm::FilePath),
232            import_list: None,
233        });
234    }
235
236    let (token, token_rel_start, token_rel_end) = first_token_with_range(rest)?;
237    let token_start = after_keyword + token_rel_start;
238    let token_end = after_keyword + token_rel_end;
239
240    Some(ModuleImportHead {
241        kind: ModuleImportKind::Require,
242        token,
243        token_start,
244        token_end,
245        require_form: Some(RequireForm::ModuleName),
246        import_list: None,
247    })
248}
249
250fn classify_use_import_list(rest: &str) -> ImportListForm {
251    let trimmed = rest.trim_start();
252
253    if trimmed.is_empty() || trimmed.starts_with(';') {
254        return ImportListForm::Default;
255    }
256
257    if let Some(after_open) = trimmed.strip_prefix('(')
258        && let Some(close_idx) = after_open.find(')')
259        && after_open[..close_idx].trim().is_empty()
260    {
261        let after_close = after_open[close_idx + 1..].trim_start();
262        if after_close.is_empty() || after_close.starts_with(';') || after_close.starts_with('#') {
263            return ImportListForm::Empty;
264        }
265    }
266
267    ImportListForm::Explicit
268}
269
270fn parse_statement_head<'a>(line: &'a str, keyword: &str) -> Option<(&'a str, usize, usize)> {
271    let trimmed = line.trim_start();
272    let leading = line.len().saturating_sub(trimmed.len());
273
274    let rest = trimmed.strip_prefix(keyword)?;
275    if !rest.chars().next().is_some_and(char::is_whitespace) {
276        return None;
277    }
278
279    let (token, token_rel_start, token_rel_end) = first_token_with_range(rest)?;
280    let token_start = leading + keyword.len() + token_rel_start;
281    let token_end = leading + keyword.len() + token_rel_end;
282
283    Some((token, token_start, token_end))
284}
285
286fn first_token_with_range(input: &str) -> Option<(&str, usize, usize)> {
287    let mut token_start = None;
288
289    for (idx, ch) in input.char_indices() {
290        match token_start {
291            None => {
292                if is_token_delimiter(ch) {
293                    continue;
294                }
295                token_start = Some(idx);
296            }
297            Some(start) => {
298                if is_token_delimiter(ch) {
299                    if start == idx {
300                        return None;
301                    }
302                    return Some((&input[start..idx], start, idx));
303                }
304            }
305        }
306    }
307
308    if let Some(start) = token_start {
309        if start < input.len() { Some((&input[start..], start, input.len())) } else { None }
310    } else {
311        None
312    }
313}
314
315fn is_token_delimiter(ch: char) -> bool {
316    ch.is_whitespace() || matches!(ch, ';' | '(' | ')')
317}
318
319// ── Literal require/import extractor ────────────────────────────────────────
320
321/// A single symbol extracted from a literal `require Module; Module->import(...)` pair.
322#[derive(Debug, Clone, PartialEq, Eq)]
323pub struct RequireImportEntry {
324    /// The fully qualified module name (e.g. `Foo::Bar`).
325    pub module: String,
326    /// The symbol name imported from the module.
327    pub symbol: String,
328    /// Byte offset of the `require` statement start in the source string.
329    pub require_byte_offset: usize,
330    /// Byte offset of the `Module->import(...)` statement start in the source string.
331    pub import_byte_offset: usize,
332}
333
334/// Extract symbols from literal `require Module; Module->import(...)` patterns
335/// found anywhere in `source`.
336///
337/// # Recognised patterns
338///
339/// - `require Module::Path;` followed on the same or a later nearby line by
340///   `Module::Path->import(qw(a b c));` or another Perl `qw` delimiter
341/// - `require Module::Path;` followed by
342///   `Module::Path->import('a', 'b');`
343/// - `require Module::Path;` followed by
344///   `Module::Path->import("a", "b");`
345///
346/// Whitespace around `->`, `import`, and the call parentheses is tolerated
347/// for literal receiver calls.
348///
349/// # Non-goals (not matched)
350///
351/// - `require $var;` (dynamic module name — variable)
352/// - `Module->import(@list);` (dynamic argument list — array variable)
353/// - `map { Module->import($_) } @syms;` (computed expressions)
354/// - `$class->import('x');` (variable receiver)
355///
356/// The extractor is **text-level only** — it does not parse a full AST.
357/// It works on whitespace-normalised lines and a small lookahead window.
358#[must_use]
359pub fn extract_require_import_symbols(source: &str) -> Vec<RequireImportEntry> {
360    let mut entries = Vec::new();
361
362    // Build a list of (trimmed_byte_offset, trimmed_line) pairs.
363    let lines: Vec<(usize, &str)> = {
364        let mut v = Vec::new();
365        let mut offset = 0usize;
366        for line in source.split('\n') {
367            let trimmed = line.trim();
368            if !trimmed.is_empty() {
369                let leading = line.len().saturating_sub(line.trim_start().len());
370                v.push((offset + leading, trimmed));
371            }
372            offset += line.len() + 1; // +1 for the '\n' we split on
373        }
374        v
375    };
376
377    for (i, &(req_offset, req_line)) in lines.iter().enumerate() {
378        // Match `require BarewordModule;`
379        let parsed_require = match parse_literal_require_line(req_line) {
380            Some(parsed_require) => parsed_require,
381            None => continue,
382        };
383        let module = parsed_require.module;
384
385        if collect_literal_import_entries(
386            &mut entries,
387            module,
388            req_offset,
389            req_offset + parsed_require.tail_start,
390            parsed_require.tail,
391        ) {
392            continue;
393        }
394
395        // Scan the remaining lines within a reasonable window (same scope, adjacent).
396        // We allow up to 5 blank-skipped lines between require and import to handle
397        // common real-world spacing without false positives across unrelated statements.
398        let window_end = (i + 1 + 5).min(lines.len());
399        for &(imp_offset, imp_line) in &lines[i + 1..window_end] {
400            if collect_literal_import_entries(
401                &mut entries,
402                module,
403                req_offset,
404                imp_offset,
405                imp_line,
406            ) {
407                // Consumed this import statement — move to next require.
408                break;
409            }
410            // If the line is a different require or a use, stop looking for a matching import.
411            if is_statement_terminator(imp_line) {
412                break;
413            }
414        }
415    }
416
417    entries
418}
419
420/// Parse a line of the form `require BarewordModule::Name;`.
421///
422/// Returns the module name string slice from `line`, or `None` if the line
423/// does not match this exact pattern.
424///
425/// Rejects:
426/// - `require $var;` (variable)
427/// - `require "file.pm";` (quoted file path)
428/// - `require 'file.pm';` (quoted file path)
429struct ParsedLiteralRequire<'a> {
430    module: &'a str,
431    tail_start: usize,
432    tail: &'a str,
433}
434
435fn parse_literal_require_line(line: &str) -> Option<ParsedLiteralRequire<'_>> {
436    let rest = line.strip_prefix("require")?;
437    // Must have whitespace after `require`.
438    if !rest.starts_with(|c: char| c.is_whitespace()) {
439        return None;
440    }
441    let leading_after_keyword = rest.len().saturating_sub(rest.trim_start().len());
442    let rest = rest.trim_start();
443    // Reject variables and quoted paths.
444    if rest.starts_with('$') || rest.starts_with('"') || rest.starts_with('\'') {
445        return None;
446    }
447
448    let module_end = rest.find(|c: char| c == ';' || c.is_whitespace()).unwrap_or(rest.len());
449    let module = &rest[..module_end];
450    if !is_valid_bareword_module_name(module) {
451        return None;
452    }
453
454    let after_module = &rest[module_end..];
455    let semicolon_offset = after_module.find(';')?;
456    let tail_start = "require".len() + leading_after_keyword + module_end + semicolon_offset + 1;
457    Some(ParsedLiteralRequire { module, tail_start, tail: &line[tail_start..] })
458}
459
460fn is_valid_bareword_module_name(module: &str) -> bool {
461    if module.is_empty() {
462        return false;
463    }
464
465    module.split("::").all(|part| {
466        !part.is_empty()
467            && part.starts_with(|c: char| c.is_ascii_alphabetic() || c == '_')
468            && part.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
469    })
470}
471
472fn collect_literal_import_entries(
473    entries: &mut Vec<RequireImportEntry>,
474    module: &str,
475    require_byte_offset: usize,
476    import_byte_offset: usize,
477    candidate: &str,
478) -> bool {
479    let leading = candidate.len().saturating_sub(candidate.trim_start().len());
480    let candidate = candidate.trim_start();
481
482    if let Some(symbols) = parse_literal_import_call(candidate, module) {
483        for symbol in symbols {
484            entries.push(RequireImportEntry {
485                module: module.to_string(),
486                symbol,
487                require_byte_offset,
488                import_byte_offset: import_byte_offset + leading,
489            });
490        }
491        return true;
492    }
493
494    false
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500
501    #[test]
502    fn token_as_module_name_keeps_non_pm_require_tokens() -> Result<(), String> {
503        let bare = parse_module_import_head("require Local::Util;")
504            .ok_or_else(|| "expected bare require head".to_string())?;
505        assert_eq!(bare.token_as_module_name(), "Local::Util");
506
507        let script = parse_module_import_head(r#"require "script.pl";"#)
508            .ok_or_else(|| "expected quoted script require head".to_string())?;
509        assert_eq!(script.token_as_module_name(), "script.pl");
510
511        Ok(())
512    }
513}
514
515/// Parse a line of the form `Module::Name->import(literal list);`.
516///
517/// Returns `Some(Vec<String>)` of symbol names when the line matches the
518/// expected module name with only literal arguments (`qw(...)`, `'x'`, `"x"`).
519/// Returns `None` when the line does not match or contains dynamic arguments.
520fn parse_literal_import_call(line: &str, expected_module: &str) -> Option<Vec<String>> {
521    let after_module = line.strip_prefix(expected_module)?.trim_start();
522    let after_arrow = after_module.strip_prefix("->")?.trim_start();
523    let after_method = after_arrow.strip_prefix("import")?.trim_start();
524    let after_open = after_method.strip_prefix('(')?;
525
526    // Find the matching close paren.
527    let close_idx = after_open.rfind(')')?;
528    let args_src = &after_open[..close_idx];
529
530    // Reject dynamic arguments: arrays, scalars, map, grep.
531    if args_src.contains('@') || args_src.contains('$') {
532        return None;
533    }
534
535    let symbols = parse_literal_arg_list(args_src)?;
536    Some(symbols)
537}
538
539/// Parse the interior of an `import(...)` argument list that contains only
540/// literal strings and/or a `qw(...)` list.
541///
542/// Returns `None` when any argument looks dynamic or unparseable.
543fn parse_literal_arg_list(args: &str) -> Option<Vec<String>> {
544    let trimmed = args.trim();
545
546    if trimmed.is_empty() {
547        return Some(Vec::new());
548    }
549
550    if let Some(words) = parse_qw_arg_list(trimmed) {
551        return Some(words);
552    }
553
554    // Comma-separated literal strings: 'a', "b", 'c'
555    let mut symbols = Vec::new();
556    for part in trimmed.split(',') {
557        let p = part.trim();
558        if p.is_empty() {
559            continue;
560        }
561        // Single-quoted string.
562        if let Some(inner) = p.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) {
563            if inner.is_empty() {
564                continue;
565            }
566            symbols.push(inner.to_string());
567            continue;
568        }
569        // Double-quoted string.
570        if let Some(inner) = p.strip_prefix('"').and_then(|s| s.strip_suffix('"')) {
571            if inner.is_empty() {
572                continue;
573            }
574            symbols.push(inner.to_string());
575            continue;
576        }
577        // Anything else is not a literal — bail out.
578        return None;
579    }
580
581    Some(symbols)
582}
583
584fn parse_qw_arg_list(trimmed: &str) -> Option<Vec<String>> {
585    let after_operator = trimmed.strip_prefix("qw")?;
586    let delimiter = after_operator.chars().next()?;
587    if delimiter.is_ascii_alphanumeric() || delimiter == '_' || delimiter.is_whitespace() {
588        return None;
589    }
590
591    let closing = match delimiter {
592        '(' => ')',
593        '[' => ']',
594        '{' => '}',
595        '<' => '>',
596        other => other,
597    };
598
599    let inner_start = "qw".len() + delimiter.len_utf8();
600    let inner_end = trimmed.len().checked_sub(closing.len_utf8())?;
601    if inner_start > inner_end || !trimmed.ends_with(closing) {
602        return None;
603    }
604
605    let inner = &trimmed[inner_start..inner_end];
606    Some(inner.split_whitespace().filter(|word| !word.is_empty()).map(str::to_string).collect())
607}
608
609/// Return true when `line` indicates a new statement boundary that should stop
610/// the lookahead window for require-then-import matching.
611///
612/// We stop on `use`, another `require`, a `sub`, `package`, or `my` declaration
613/// to avoid false positives across unrelated statement blocks.
614fn is_statement_terminator(line: &str) -> bool {
615    line.starts_with("use ")
616        || line.starts_with("require ")
617        || line.starts_with("sub ")
618        || line.starts_with("package ")
619        || line.starts_with("my ")
620        || line.starts_with("our ")
621        || line.starts_with("local ")
622}