Skip to main content

perl_module/import/
mod.rs

1//! Single-line Perl import head parsing and literal require/import extraction.
2//!
3//! Parse a single source line that starts with `use` or `require` and return
4//! the first import token with stable byte offsets.
5//!
6//! Also provides [`extract_require_import_symbols`], a text-level extractor
7//! that recognises the literal `require Module; Module->import(...)` adjacency
8//! pattern in multi-line source without requiring AST construction.
9
10/// When a module is loaded relative to program execution.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum LoadTiming {
13    /// Module is loaded at compile time (e.g. `use`).
14    CompileTime,
15    /// Module is loaded at runtime (e.g. `require`).
16    Runtime,
17}
18
19/// Whether the module's `import` method is called after loading.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum ImportBehavior {
22    /// The module's `import` method is called (as with `use`).
23    CallsImport,
24    /// No `import` call is made (as with `require`).
25    NoImport,
26}
27
28/// Semantic description of a `use`/`require` dispatch form.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub struct DispatchSemantics {
31    /// When the module load happens.
32    pub load_timing: LoadTiming,
33    /// Whether `import` is called on the loaded module.
34    pub import_behavior: ImportBehavior,
35}
36
37impl DispatchSemantics {
38    /// A short human-readable description suitable for hover text.
39    #[must_use]
40    pub fn hover_description(&self) -> &'static str {
41        match (self.load_timing, self.import_behavior) {
42            (LoadTiming::CompileTime, ImportBehavior::CallsImport) => {
43                "compile-time load; calls import()"
44            }
45            (LoadTiming::Runtime, ImportBehavior::NoImport) => "runtime load; no import() call",
46            (LoadTiming::CompileTime, ImportBehavior::NoImport) => {
47                "compile-time load; no import() call"
48            }
49            (LoadTiming::Runtime, ImportBehavior::CallsImport) => "runtime load; calls import()",
50        }
51    }
52}
53
54/// How a `use` statement spells its import list.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum ImportListForm {
57    /// `use Module;`
58    Default,
59    /// `use Module ();`
60    Empty,
61    /// `use Module (...)`
62    Explicit,
63}
64
65/// Distinguishes the two syntactic forms of `require`.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum RequireForm {
68    /// `require Module::Name` — bare module name.
69    ModuleName,
70    /// `require "path/to/file.pm"` or `require 'path/to/file.pm'` — quoted file path.
71    FilePath,
72}
73
74/// Classifies the import statement form for a parsed line.
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum ModuleImportKind {
77    /// `use Module::Name;`
78    Use,
79    /// `require Module::Name;` or `require "file.pm";`
80    Require,
81    /// `use parent ...`
82    UseParent,
83    /// `use base ...`
84    UseBase,
85}
86
87impl ModuleImportKind {
88    /// Returns the dispatch semantics for this import kind.
89    #[must_use]
90    pub fn dispatch_semantics(self) -> DispatchSemantics {
91        match self {
92            ModuleImportKind::Use | ModuleImportKind::UseParent | ModuleImportKind::UseBase => {
93                DispatchSemantics {
94                    load_timing: LoadTiming::CompileTime,
95                    import_behavior: ImportBehavior::CallsImport,
96                }
97            }
98            ModuleImportKind::Require => DispatchSemantics {
99                load_timing: LoadTiming::Runtime,
100                import_behavior: ImportBehavior::NoImport,
101            },
102        }
103    }
104}
105
106/// Parsed leading import token from a `use`/`require` line.
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108pub struct ModuleImportHead<'a> {
109    /// Parsed statement kind.
110    pub kind: ModuleImportKind,
111    /// First token after `use` or `require` (quotes stripped for file-path forms).
112    pub token: &'a str,
113    /// Inclusive byte start offset of `token` in the full line.
114    pub token_start: usize,
115    /// Exclusive byte end offset of `token` in the full line.
116    pub token_end: usize,
117    /// For `require`, whether the argument was a quoted file path or a bare module name.
118    /// Always `None` for `use` forms.
119    require_form: Option<RequireForm>,
120    /// For `use` statements, how the import list is spelled.
121    pub import_list: Option<ImportListForm>,
122}
123
124/// Resolve a known export tag to its symbol list for a specific module.
125///
126/// The `tag` argument can be passed with or without a leading `:`.
127/// Returns `None` when the module/tag pair is not in the built-in catalog.
128#[must_use]
129pub fn resolve_known_export_tag(module: &str, tag: &str) -> Option<&'static [&'static str]> {
130    let normalized_tag = tag.strip_prefix(':').unwrap_or(tag);
131    match (module, normalized_tag) {
132        ("POSIX", "sys_wait_h") => Some(&["WIFEXITED", "WEXITSTATUS", "WIFSIGNALED", "WTERMSIG"]),
133        ("POSIX", "fcntl_h") => Some(&["F_GETFL", "F_SETFL", "F_SETFD", "F_GETFD"]),
134        ("POSIX", "termios_h") => Some(&["TCSANOW", "TCSADRAIN", "TCSAFLUSH", "B9600"]),
135        ("File::Find", "find") => Some(&["find", "finddepth"]),
136        ("Fcntl", "seek") => Some(&["SEEK_SET", "SEEK_CUR", "SEEK_END"]),
137        ("Fcntl", "lock") => Some(&["LOCK_SH", "LOCK_EX", "LOCK_NB", "LOCK_UN"]),
138        ("Encode", "fallback") => Some(&["FB_DEFAULT", "FB_CROAK", "FB_QUIET", "FB_WARN"]),
139        _ => None,
140    }
141}
142
143impl<'a> ModuleImportHead<'a> {
144    /// Returns the [`RequireForm`] for `require` statements, or `None` for `use` forms.
145    #[must_use]
146    pub fn require_form(&self) -> Option<RequireForm> {
147        self.require_form
148    }
149}
150
151/// Parse the leading import token of a single Perl source line.
152///
153/// Returns [`None`] when the line does not start with `use` or `require`
154/// (after leading whitespace) or when no token is present after the keyword.
155#[must_use]
156pub fn parse_module_import_head(line: &str) -> Option<ModuleImportHead<'_>> {
157    if let Some((token, token_start, token_end)) = parse_statement_head(line, "use") {
158        let kind = match token {
159            "parent" => ModuleImportKind::UseParent,
160            "base" => ModuleImportKind::UseBase,
161            _ => ModuleImportKind::Use,
162        };
163
164        let import_list = match kind {
165            ModuleImportKind::Use => Some(classify_use_import_list(&line[token_end..])),
166            ModuleImportKind::UseParent | ModuleImportKind::UseBase => None,
167            ModuleImportKind::Require => None,
168        };
169
170        return Some(ModuleImportHead {
171            kind,
172            token,
173            token_start,
174            token_end,
175            require_form: None,
176            import_list,
177        });
178    }
179
180    if let Some(result) = parse_require_head(line) {
181        return Some(result);
182    }
183
184    None
185}
186
187/// Parse a `require` statement, handling both bare module names and quoted file paths.
188fn parse_require_head(line: &str) -> Option<ModuleImportHead<'_>> {
189    let trimmed = line.trim_start();
190    let leading = line.len().saturating_sub(trimmed.len());
191
192    let rest = trimmed.strip_prefix("require")?;
193    if !rest.chars().next().is_some_and(char::is_whitespace) {
194        return None;
195    }
196
197    let after_keyword = leading + "require".len();
198
199    let rest_trimmed = rest.trim_start();
200    let quote_offset = rest.len() - rest_trimmed.len();
201
202    if let Some(quote_char) = rest_trimmed.chars().next().filter(|ch| *ch == '"' || *ch == '\'') {
203        let quoted = &rest_trimmed[quote_char.len_utf8()..];
204        let close_idx = quoted.find(quote_char)?;
205        let inner = &quoted[..close_idx];
206
207        let token_start = after_keyword + quote_offset + quote_char.len_utf8();
208        let token_end = token_start + inner.len();
209        return Some(ModuleImportHead {
210            kind: ModuleImportKind::Require,
211            token: inner,
212            token_start,
213            token_end,
214            require_form: Some(RequireForm::FilePath),
215            import_list: None,
216        });
217    }
218
219    let (token, token_rel_start, token_rel_end) = first_token_with_range(rest)?;
220    let token_start = after_keyword + token_rel_start;
221    let token_end = after_keyword + token_rel_end;
222
223    Some(ModuleImportHead {
224        kind: ModuleImportKind::Require,
225        token,
226        token_start,
227        token_end,
228        require_form: Some(RequireForm::ModuleName),
229        import_list: None,
230    })
231}
232
233fn classify_use_import_list(rest: &str) -> ImportListForm {
234    let trimmed = rest.trim_start();
235
236    if trimmed.is_empty() || trimmed.starts_with(';') {
237        return ImportListForm::Default;
238    }
239
240    if let Some(after_open) = trimmed.strip_prefix('(')
241        && let Some(close_idx) = after_open.find(')')
242        && after_open[..close_idx].trim().is_empty()
243    {
244        let after_close = after_open[close_idx + 1..].trim_start();
245        if after_close.is_empty() || after_close.starts_with(';') || after_close.starts_with('#') {
246            return ImportListForm::Empty;
247        }
248    }
249
250    ImportListForm::Explicit
251}
252
253fn parse_statement_head<'a>(line: &'a str, keyword: &str) -> Option<(&'a str, usize, usize)> {
254    let trimmed = line.trim_start();
255    let leading = line.len().saturating_sub(trimmed.len());
256
257    let rest = trimmed.strip_prefix(keyword)?;
258    if !rest.chars().next().is_some_and(char::is_whitespace) {
259        return None;
260    }
261
262    let (token, token_rel_start, token_rel_end) = first_token_with_range(rest)?;
263    let token_start = leading + keyword.len() + token_rel_start;
264    let token_end = leading + keyword.len() + token_rel_end;
265
266    Some((token, token_start, token_end))
267}
268
269fn first_token_with_range(input: &str) -> Option<(&str, usize, usize)> {
270    let mut token_start = None;
271
272    for (idx, ch) in input.char_indices() {
273        match token_start {
274            None => {
275                if is_token_delimiter(ch) {
276                    continue;
277                }
278                token_start = Some(idx);
279            }
280            Some(start) => {
281                if is_token_delimiter(ch) {
282                    if start == idx {
283                        return None;
284                    }
285                    return Some((&input[start..idx], start, idx));
286                }
287            }
288        }
289    }
290
291    if let Some(start) = token_start {
292        if start < input.len() { Some((&input[start..], start, input.len())) } else { None }
293    } else {
294        None
295    }
296}
297
298fn is_token_delimiter(ch: char) -> bool {
299    ch.is_whitespace() || matches!(ch, ';' | '(' | ')')
300}
301
302// ── Literal require/import extractor ────────────────────────────────────────
303
304/// A single symbol extracted from a literal `require Module; Module->import(...)` pair.
305#[derive(Debug, Clone, PartialEq, Eq)]
306pub struct RequireImportEntry {
307    /// The fully qualified module name (e.g. `Foo::Bar`).
308    pub module: String,
309    /// The symbol name imported from the module.
310    pub symbol: String,
311    /// Byte offset of the `require` statement start in the source string.
312    pub require_byte_offset: usize,
313    /// Byte offset of the `Module->import(...)` statement start in the source string.
314    pub import_byte_offset: usize,
315}
316
317/// Extract symbols from literal `require Module; Module->import(...)` patterns
318/// found anywhere in `source`.
319///
320/// # Recognised patterns
321///
322/// - `require Module::Path;` followed on the same or a later nearby line by
323///   `Module::Path->import(qw(a b c));` or another Perl `qw` delimiter
324/// - `require Module::Path;` followed by
325///   `Module::Path->import('a', 'b');`
326/// - `require Module::Path;` followed by
327///   `Module::Path->import("a", "b");`
328///
329/// Whitespace around `->`, `import`, and the call parentheses is tolerated
330/// for literal receiver calls.
331///
332/// # Non-goals (not matched)
333///
334/// - `require $var;` (dynamic module name — variable)
335/// - `Module->import(@list);` (dynamic argument list — array variable)
336/// - `map { Module->import($_) } @syms;` (computed expressions)
337/// - `$class->import('x');` (variable receiver)
338///
339/// The extractor is **text-level only** — it does not parse a full AST.
340/// It works on whitespace-normalised lines and a small lookahead window.
341#[must_use]
342pub fn extract_require_import_symbols(source: &str) -> Vec<RequireImportEntry> {
343    let mut entries = Vec::new();
344
345    // Build a list of (trimmed_byte_offset, trimmed_line) pairs.
346    let lines: Vec<(usize, &str)> = {
347        let mut v = Vec::new();
348        let mut offset = 0usize;
349        for line in source.split('\n') {
350            let trimmed = line.trim();
351            if !trimmed.is_empty() {
352                let leading = line.len().saturating_sub(line.trim_start().len());
353                v.push((offset + leading, trimmed));
354            }
355            offset += line.len() + 1; // +1 for the '\n' we split on
356        }
357        v
358    };
359
360    for (i, &(req_offset, req_line)) in lines.iter().enumerate() {
361        // Match `require BarewordModule;`
362        let parsed_require = match parse_literal_require_line(req_line) {
363            Some(parsed_require) => parsed_require,
364            None => continue,
365        };
366        let module = parsed_require.module;
367
368        if collect_literal_import_entries(
369            &mut entries,
370            module,
371            req_offset,
372            req_offset + parsed_require.tail_start,
373            parsed_require.tail,
374        ) {
375            continue;
376        }
377
378        // Scan the remaining lines within a reasonable window (same scope, adjacent).
379        // We allow up to 5 blank-skipped lines between require and import to handle
380        // common real-world spacing without false positives across unrelated statements.
381        let window_end = (i + 1 + 5).min(lines.len());
382        for &(imp_offset, imp_line) in &lines[i + 1..window_end] {
383            if collect_literal_import_entries(
384                &mut entries,
385                module,
386                req_offset,
387                imp_offset,
388                imp_line,
389            ) {
390                // Consumed this import statement — move to next require.
391                break;
392            }
393            // If the line is a different require or a use, stop looking for a matching import.
394            if is_statement_terminator(imp_line) {
395                break;
396            }
397        }
398    }
399
400    entries
401}
402
403/// Parse a line of the form `require BarewordModule::Name;`.
404///
405/// Returns the module name string slice from `line`, or `None` if the line
406/// does not match this exact pattern.
407///
408/// Rejects:
409/// - `require $var;` (variable)
410/// - `require "file.pm";` (quoted file path)
411/// - `require 'file.pm';` (quoted file path)
412struct ParsedLiteralRequire<'a> {
413    module: &'a str,
414    tail_start: usize,
415    tail: &'a str,
416}
417
418fn parse_literal_require_line(line: &str) -> Option<ParsedLiteralRequire<'_>> {
419    let rest = line.strip_prefix("require")?;
420    // Must have whitespace after `require`.
421    if !rest.starts_with(|c: char| c.is_whitespace()) {
422        return None;
423    }
424    let leading_after_keyword = rest.len().saturating_sub(rest.trim_start().len());
425    let rest = rest.trim_start();
426    // Reject variables and quoted paths.
427    if rest.starts_with('$') || rest.starts_with('"') || rest.starts_with('\'') {
428        return None;
429    }
430
431    let module_end = rest.find(|c: char| c == ';' || c.is_whitespace()).unwrap_or(rest.len());
432    let module = &rest[..module_end];
433    if !is_valid_bareword_module_name(module) {
434        return None;
435    }
436
437    let after_module = &rest[module_end..];
438    let semicolon_offset = after_module.find(';')?;
439    let tail_start = "require".len() + leading_after_keyword + module_end + semicolon_offset + 1;
440    Some(ParsedLiteralRequire { module, tail_start, tail: &line[tail_start..] })
441}
442
443fn is_valid_bareword_module_name(module: &str) -> bool {
444    if module.is_empty() {
445        return false;
446    }
447
448    module.split("::").all(|part| {
449        !part.is_empty()
450            && part.starts_with(|c: char| c.is_ascii_alphabetic() || c == '_')
451            && part.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
452    })
453}
454
455fn collect_literal_import_entries(
456    entries: &mut Vec<RequireImportEntry>,
457    module: &str,
458    require_byte_offset: usize,
459    import_byte_offset: usize,
460    candidate: &str,
461) -> bool {
462    let leading = candidate.len().saturating_sub(candidate.trim_start().len());
463    let candidate = candidate.trim_start();
464
465    if let Some(symbols) = parse_literal_import_call(candidate, module) {
466        for symbol in symbols {
467            entries.push(RequireImportEntry {
468                module: module.to_string(),
469                symbol,
470                require_byte_offset,
471                import_byte_offset: import_byte_offset + leading,
472            });
473        }
474        return true;
475    }
476
477    false
478}
479
480/// Parse a line of the form `Module::Name->import(literal list);`.
481///
482/// Returns `Some(Vec<String>)` of symbol names when the line matches the
483/// expected module name with only literal arguments (`qw(...)`, `'x'`, `"x"`).
484/// Returns `None` when the line does not match or contains dynamic arguments.
485fn parse_literal_import_call(line: &str, expected_module: &str) -> Option<Vec<String>> {
486    let after_module = line.strip_prefix(expected_module)?.trim_start();
487    let after_arrow = after_module.strip_prefix("->")?.trim_start();
488    let after_method = after_arrow.strip_prefix("import")?.trim_start();
489    let after_open = after_method.strip_prefix('(')?;
490
491    // Find the matching close paren.
492    let close_idx = after_open.rfind(')')?;
493    let args_src = &after_open[..close_idx];
494
495    // Reject dynamic arguments: arrays, scalars, map, grep.
496    if args_src.contains('@') || args_src.contains('$') {
497        return None;
498    }
499
500    let symbols = parse_literal_arg_list(args_src)?;
501    Some(symbols)
502}
503
504/// Parse the interior of an `import(...)` argument list that contains only
505/// literal strings and/or a `qw(...)` list.
506///
507/// Returns `None` when any argument looks dynamic or unparseable.
508fn parse_literal_arg_list(args: &str) -> Option<Vec<String>> {
509    let trimmed = args.trim();
510
511    if trimmed.is_empty() {
512        return Some(Vec::new());
513    }
514
515    if let Some(words) = parse_qw_arg_list(trimmed) {
516        return Some(words);
517    }
518
519    // Comma-separated literal strings: 'a', "b", 'c'
520    let mut symbols = Vec::new();
521    for part in trimmed.split(',') {
522        let p = part.trim();
523        if p.is_empty() {
524            continue;
525        }
526        // Single-quoted string.
527        if let Some(inner) = p.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) {
528            if inner.is_empty() {
529                continue;
530            }
531            symbols.push(inner.to_string());
532            continue;
533        }
534        // Double-quoted string.
535        if let Some(inner) = p.strip_prefix('"').and_then(|s| s.strip_suffix('"')) {
536            if inner.is_empty() {
537                continue;
538            }
539            symbols.push(inner.to_string());
540            continue;
541        }
542        // Anything else is not a literal — bail out.
543        return None;
544    }
545
546    Some(symbols)
547}
548
549fn parse_qw_arg_list(trimmed: &str) -> Option<Vec<String>> {
550    let after_operator = trimmed.strip_prefix("qw")?;
551    let delimiter = after_operator.chars().next()?;
552    if delimiter.is_ascii_alphanumeric() || delimiter == '_' || delimiter.is_whitespace() {
553        return None;
554    }
555
556    let closing = match delimiter {
557        '(' => ')',
558        '[' => ']',
559        '{' => '}',
560        '<' => '>',
561        other => other,
562    };
563
564    let inner_start = "qw".len() + delimiter.len_utf8();
565    let inner_end = trimmed.len().checked_sub(closing.len_utf8())?;
566    if inner_start > inner_end || !trimmed.ends_with(closing) {
567        return None;
568    }
569
570    let inner = &trimmed[inner_start..inner_end];
571    Some(inner.split_whitespace().filter(|word| !word.is_empty()).map(str::to_string).collect())
572}
573
574/// Return true when `line` indicates a new statement boundary that should stop
575/// the lookahead window for require-then-import matching.
576///
577/// We stop on `use`, another `require`, a `sub`, `package`, or `my` declaration
578/// to avoid false positives across unrelated statement blocks.
579fn is_statement_terminator(line: &str) -> bool {
580    line.starts_with("use ")
581        || line.starts_with("require ")
582        || line.starts_with("sub ")
583        || line.starts_with("package ")
584        || line.starts_with("my ")
585        || line.starts_with("our ")
586        || line.starts_with("local ")
587}