Skip to main content

perl_module/resolution/
use_lib.rs

1//! Extract include paths from `use lib` and `FindBin` statements.
2//!
3//! Scans Perl source text for `use lib` pragmas and recognizes common
4//! `FindBin` patterns to discover additional module include directories.
5
6use std::path::{Component, Path, PathBuf};
7
8/// A discovered include path from a `use lib` statement.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct UseLibPath {
11    /// The resolved directory path (relative or absolute).
12    pub path: String,
13    /// Whether this path was derived from a `FindBin` variable.
14    pub from_findbin: bool,
15}
16
17/// A `use lib` / `no lib` operation extracted from source in lexical order.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum UseLibAction {
20    /// Add paths to the effective include stack.
21    Add(Vec<UseLibPath>),
22    /// Remove paths from the effective include stack.
23    Remove(Vec<UseLibPath>),
24}
25
26/// Extract include paths from `use lib` statements in Perl source text.
27///
28/// Handles the following patterns:
29/// - `use lib 'path';`
30/// - `use lib "path";`
31/// - `use lib qw(path1 path2);`
32/// - `use lib qw/path1 path2/;`
33/// - `use lib ("path1", "path2");`
34/// - `use lib '$FindBin::Bin/path'` and `"$FindBin::Bin/path"`
35/// - `use lib '$Bin/path'` and `"$RealBin/path"` (from `FindBin` exports)
36///
37/// Returns extracted paths in order of appearance.
38pub fn extract_use_lib_paths(source: &str) -> Vec<UseLibPath> {
39    let mut paths = Vec::new();
40
41    for statement in split_perl_statements(source) {
42        let trimmed = statement.trim();
43        if let Some(rest) = strip_use_lib_prefix(trimmed) {
44            extract_paths_from_args(rest, &mut paths);
45        }
46    }
47
48    paths
49}
50
51/// Extract ordered `use lib` and `no lib` operations from source text.
52#[must_use]
53pub fn extract_use_lib_operations(source: &str) -> Vec<UseLibAction> {
54    let mut ops = Vec::new();
55
56    for statement in split_perl_statements(source) {
57        let trimmed = statement.trim();
58        if let Some(rest) = strip_use_lib_prefix(trimmed) {
59            let mut paths = Vec::new();
60            extract_paths_from_args(rest, &mut paths);
61            if !paths.is_empty() {
62                ops.push(UseLibAction::Add(paths));
63            }
64            continue;
65        }
66
67        if let Some(rest) = strip_no_lib_prefix(trimmed) {
68            let mut paths = Vec::new();
69            extract_paths_from_args(rest, &mut paths);
70            if !paths.is_empty() {
71                ops.push(UseLibAction::Remove(paths));
72            }
73        }
74    }
75
76    ops
77}
78
79fn split_perl_statements(source: &str) -> Vec<&str> {
80    let mut statements = Vec::new();
81    let mut start = 0;
82    let mut in_single = false;
83    let mut in_double = false;
84    let mut escaped = false;
85    // Whether any non-whitespace, non-comment content has appeared in the
86    // current statement since `start`.  When false and we hit a comment, we
87    // can safely advance `start` past the comment so it doesn't pollute the
88    // next statement slice.
89    let mut has_content = false;
90
91    let chars: Vec<(usize, char)> = source.char_indices().collect();
92    let mut i = 0;
93
94    while i < chars.len() {
95        let (idx, ch) = chars[i];
96
97        if escaped {
98            escaped = false;
99            i += 1;
100            continue;
101        }
102
103        if ch == '\\' && (in_single || in_double) {
104            escaped = true;
105            i += 1;
106            continue;
107        }
108
109        if ch == '\'' && !in_double {
110            in_single = !in_single;
111            has_content = true;
112            i += 1;
113            continue;
114        }
115
116        if ch == '"' && !in_single {
117            in_double = !in_double;
118            has_content = true;
119            i += 1;
120            continue;
121        }
122
123        // Skip Perl line comments: # ... <newline>
124        // A `#` is only a comment when outside of any string literal.
125        if ch == '#' && !in_single && !in_double {
126            // Skip to end of line (or end of source).
127            let comment_end = match source[idx..].find('\n') {
128                Some(nl_offset) => idx + nl_offset + 1,
129                None => source.len(),
130            };
131            // If no statement content has been seen yet, advance `start` past
132            // the comment so the comment text is not included in the next slice.
133            if !has_content {
134                start = comment_end;
135            }
136            // Skip the iterator past the comment.
137            while i < chars.len() && chars[i].0 < comment_end {
138                i += 1;
139            }
140            continue;
141        }
142
143        if ch == ';' && !in_single && !in_double {
144            let end = idx + ch.len_utf8();
145            statements.push(&source[start..end]);
146            start = end;
147            has_content = false;
148        } else if !ch.is_whitespace() {
149            has_content = true;
150        }
151
152        i += 1;
153    }
154
155    if start < source.len() {
156        statements.push(&source[start..]);
157    }
158
159    statements
160}
161
162/// Resolve `use lib` paths against a workspace root and optional file directory.
163///
164/// - Absolute paths are accepted only when they stay under `workspace_root`.
165/// - `$FindBin::Bin`-relative paths are resolved against `file_dir` (or `workspace_root` if absent).
166/// - Other relative paths are resolved against `workspace_root`.
167pub fn resolve_use_lib_paths(
168    use_lib_paths: &[UseLibPath],
169    workspace_root: &Path,
170    file_dir: Option<&Path>,
171) -> Vec<String> {
172    let mut result = Vec::new();
173
174    for ulp in use_lib_paths {
175        let path_str = &ulp.path;
176
177        if ulp.from_findbin {
178            let base = file_dir.unwrap_or(workspace_root);
179            let Some(resolved) = normalize_findbin_path(base, path_str) else {
180                continue;
181            };
182            if resolved.strip_prefix(workspace_root).is_err() {
183                continue;
184            }
185            if let Some(s) = path_to_relative_string(&resolved, workspace_root)
186                && !result.contains(&s)
187            {
188                result.push(s);
189            }
190        } else {
191            let p = Path::new(path_str);
192            if p.is_absolute() {
193                if let Some(s) = path_to_relative_string(p, workspace_root)
194                    && !result.contains(&s)
195                {
196                    result.push(s);
197                }
198            } else {
199                let s = path_str.to_string();
200                if !result.contains(&s) {
201                    result.push(s);
202                }
203            }
204        }
205    }
206
207    result
208}
209
210/// Resolve effective include paths from lexical `use lib` / `no lib` operations.
211#[must_use]
212pub fn resolve_use_lib_paths_from_source(
213    source: &str,
214    workspace_root: &Path,
215    file_dir: Option<&Path>,
216) -> Vec<String> {
217    resolve_use_lib_paths_from_source_at_offset(source, source.len(), workspace_root, file_dir)
218}
219
220/// Resolve effective include paths from lexical `use lib` / `no lib` operations,
221/// considering only source text up to the provided byte offset.
222#[must_use]
223pub fn resolve_use_lib_paths_from_source_at_offset(
224    source: &str,
225    offset: usize,
226    workspace_root: &Path,
227    file_dir: Option<&Path>,
228) -> Vec<String> {
229    let mut resolved = Vec::new();
230    let source_prefix = source.get(..offset).unwrap_or(source);
231    for op in extract_use_lib_operations(source_prefix) {
232        match op {
233            UseLibAction::Add(paths) => {
234                let added = resolve_use_lib_paths(&paths, workspace_root, file_dir);
235                for path in added.into_iter().rev() {
236                    resolved.retain(|existing| existing != &path);
237                    resolved.insert(0, path);
238                }
239            }
240            UseLibAction::Remove(paths) => {
241                for path in resolve_use_lib_paths(&paths, workspace_root, file_dir) {
242                    resolved.retain(|existing| existing != &path);
243                }
244            }
245        }
246    }
247    resolved
248}
249
250/// Compute the set of paths that are currently excluded from `@INC` at a given
251/// source offset due to `no lib` operations.
252///
253/// Returns the resolved path strings that have been explicitly removed by `no lib`
254/// and not subsequently re-added by a later `use lib` before the given offset.
255/// Callers should use this set to filter out matching entries from configured
256/// include paths, so that `no lib 'lib'` cancels both lexical AND configured
257/// `lib` entries that would otherwise survive the lexical scan.
258///
259/// # Example
260///
261/// For the source `use lib 'lib'; no lib 'lib'; use GoneModule;` at an offset
262/// within `use GoneModule;`, this function returns `["lib"]` because `lib` was
263/// added then removed before the offset.
264#[must_use]
265pub fn no_lib_cancelled_paths_at_offset(
266    source: &str,
267    offset: usize,
268    workspace_root: &Path,
269    file_dir: Option<&Path>,
270) -> Vec<String> {
271    let mut effective = Vec::<String>::new();
272    let mut cancelled = Vec::<String>::new();
273    let source_prefix = source.get(..offset).unwrap_or(source);
274    for op in extract_use_lib_operations(source_prefix) {
275        match op {
276            UseLibAction::Add(paths) => {
277                let added = resolve_use_lib_paths(&paths, workspace_root, file_dir);
278                for path in &added {
279                    // If it was cancelled, re-adding it removes the cancellation.
280                    cancelled.retain(|c| c != path);
281                }
282                for path in added.into_iter().rev() {
283                    effective.retain(|e| e != &path);
284                    effective.insert(0, path);
285                }
286            }
287            UseLibAction::Remove(paths) => {
288                let removed = resolve_use_lib_paths(&paths, workspace_root, file_dir);
289                for path in removed {
290                    effective.retain(|e| e != &path);
291                    if !cancelled.contains(&path) {
292                        cancelled.push(path);
293                    }
294                }
295            }
296        }
297    }
298    cancelled
299}
300
301fn strip_use_lib_prefix(trimmed: &str) -> Option<&str> {
302    let rest = trimmed.strip_prefix("use")?;
303    if !rest.starts_with(|c: char| c.is_whitespace()) {
304        return None;
305    }
306    let rest = rest.trim_start();
307    let rest = rest.strip_prefix("lib")?;
308    if !rest.starts_with(|c: char| c.is_whitespace() || c == '(' || c == ';') {
309        return None;
310    }
311    Some(rest.trim_start())
312}
313
314fn strip_no_lib_prefix(trimmed: &str) -> Option<&str> {
315    let rest = trimmed.strip_prefix("no")?;
316    if !rest.starts_with(|c: char| c.is_whitespace()) {
317        return None;
318    }
319    let rest = rest.trim_start();
320    let rest = rest.strip_prefix("lib")?;
321    if !rest.starts_with(|c: char| c.is_whitespace() || c == '(' || c == ';') {
322        return None;
323    }
324    Some(rest.trim_start())
325}
326
327fn extract_paths_from_args(args: &str, out: &mut Vec<UseLibPath>) {
328    let args = args.trim_end_matches(';').trim();
329
330    if let Some(rest) = args.strip_prefix("qw") {
331        extract_qw_paths(rest.trim_start(), out);
332        return;
333    }
334
335    if let Some(inner) = strip_parens(args) {
336        extract_quoted_list(inner, out);
337        return;
338    }
339
340    extract_quoted_list(args, out);
341}
342
343fn extract_qw_paths(rest: &str, out: &mut Vec<UseLibPath>) {
344    let (open, close) = match rest.chars().next() {
345        Some('(') => ('(', ')'),
346        Some('/') => ('/', '/'),
347        Some('{') => ('{', '}'),
348        Some('[') => ('[', ']'),
349        Some('<') => ('<', '>'),
350        Some('!') => ('!', '!'),
351        _ => return,
352    };
353
354    let inner = &rest[open.len_utf8()..];
355    let end = inner.find(close).unwrap_or(inner.len());
356    let content = &inner[..end];
357
358    for word in content.split_whitespace() {
359        out.push(UseLibPath { path: word.to_string(), from_findbin: false });
360    }
361}
362
363fn strip_parens(s: &str) -> Option<&str> {
364    let s = s.trim();
365    let inner = s.strip_prefix('(')?;
366    let inner = inner.trim_end().strip_suffix(')')?;
367    Some(inner)
368}
369
370fn extract_quoted_list(s: &str, out: &mut Vec<UseLibPath>) {
371    let mut remaining = s.trim();
372
373    while !remaining.is_empty() {
374        remaining = remaining.trim_start_matches(|c: char| c == ',' || c.is_whitespace());
375        if remaining.is_empty() {
376            break;
377        }
378
379        // Skip Perl line comments: # ... <newline>
380        if remaining.starts_with('#') {
381            remaining = match remaining.find('\n') {
382                Some(nl) => &remaining[nl + 1..],
383                None => "",
384            };
385            continue;
386        }
387
388        if let Some((path, from_findbin, rest)) = extract_one_quoted(remaining) {
389            out.push(UseLibPath { path, from_findbin });
390            remaining = rest.trim_start_matches(|c: char| c == ',' || c.is_whitespace());
391        } else {
392            break;
393        }
394    }
395}
396
397fn extract_one_quoted(s: &str) -> Option<(String, bool, &str)> {
398    let s = s.trim();
399    let quote = match s.chars().next()? {
400        '\'' => '\'',
401        '"' => '"',
402        _ => return None,
403    };
404
405    let inner = &s[1..];
406    let end = inner.find(quote)?;
407    let content = &inner[..end];
408    let rest = &inner[end + 1..];
409
410    let (path, from_findbin) = resolve_findbin_in_string(content);
411    Some((path, from_findbin, rest))
412}
413
414fn resolve_findbin_in_string(s: &str) -> (String, bool) {
415    // Fully-qualified FindBin variables — no word-boundary ambiguity because `::` terminates
416    // the name and braced forms are unambiguous.
417    let qualified_vars =
418        ["$FindBin::Bin", "$FindBin::RealBin", "${FindBin::Bin}", "${FindBin::RealBin}"];
419
420    for var in &qualified_vars {
421        if let Some(rest) = s.strip_prefix(var) {
422            let path = rest.strip_prefix('/').unwrap_or(rest);
423            if path.is_empty() {
424                return (".".to_string(), true);
425            }
426            return (path.to_string(), true);
427        }
428    }
429
430    // Short exported forms: `$Bin`, `$RealBin`, `${Bin}`, `${RealBin}`.
431    // Braced forms (`${Bin}`) are always unambiguous.  Bare forms (`$Bin`,
432    // `$RealBin`) must be followed by `/`, end-of-string, or a non-identifier
433    // character to avoid false-positives on variables like `$BinDir` or
434    // `$RealBinPath`.
435    let bare_short = ["$Bin", "$RealBin"];
436    let braced_short = ["${Bin}", "${RealBin}"];
437
438    for var in &bare_short {
439        if let Some(rest) = s.strip_prefix(var) {
440            // Word-boundary check: the character after the variable name must
441            // not be a Perl identifier character (letter, digit, or `_`).
442            // This prevents `$BinDir` or `$RealBinPath` from matching `$Bin`/`$RealBin`.
443            let next = rest.chars().next();
444            if next.is_none() || next.is_some_and(|c| !c.is_alphanumeric() && c != '_') {
445                let path = rest.strip_prefix('/').unwrap_or(rest);
446                if path.is_empty() {
447                    return (".".to_string(), true);
448                }
449                return (path.to_string(), true);
450            }
451        }
452    }
453
454    for var in &braced_short {
455        if let Some(rest) = s.strip_prefix(var) {
456            let path = rest.strip_prefix('/').unwrap_or(rest);
457            if path.is_empty() {
458                return (".".to_string(), true);
459            }
460            return (path.to_string(), true);
461        }
462    }
463
464    (s.to_string(), false)
465}
466
467fn path_to_relative_string(path: &Path, workspace_root: &Path) -> Option<String> {
468    if let Ok(rel) = path.strip_prefix(workspace_root) {
469        // Guard against lexical strip_prefix matching an embedded `..` segment.
470        // For example, `/workspace/../etc` strips the `/workspace` prefix lexically,
471        // leaving `../etc` which would escape the workspace.  Reject any result
472        // that contains a parent-directory component.
473        if rel.components().any(|c| c == std::path::Component::ParentDir) {
474            return None;
475        }
476        let s = normalize_relative_path_string(rel.to_string_lossy().as_ref());
477        if s.is_empty() { Some(".".to_string()) } else { Some(s) }
478    } else if path.is_absolute() {
479        None
480    } else {
481        let s = normalize_relative_path_string(path.to_string_lossy().as_ref());
482        Some(s)
483    }
484}
485
486fn normalize_relative_path_string(path: &str) -> String {
487    path.replace('\\', "/")
488}
489
490fn normalize_findbin_path(base: &Path, relative: &str) -> Option<PathBuf> {
491    let mut normalized = PathBuf::from(base);
492    for component in Path::new(relative).components() {
493        match component {
494            Component::CurDir => {}
495            Component::Normal(segment) => normalized.push(segment),
496            Component::ParentDir => {
497                if !normalized.pop() {
498                    return None;
499                }
500            }
501            Component::RootDir | Component::Prefix(_) => return None,
502        }
503    }
504    Some(normalized)
505}