Skip to main content

perl_module/resolution/
use_lib.rs

1//! Extract include paths from `use lib` and `FindBin` statements.
2//!
3//! Scans Perl source text for `use lib` pragmas and recognizes common
4//! `FindBin` patterns to discover additional module include directories.
5
6use std::path::{Component, Path, PathBuf};
7
8/// A discovered include path from a `use lib` statement.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct UseLibPath {
11    /// The resolved directory path (relative or absolute).
12    pub path: String,
13    /// Whether this path was derived from a `FindBin` variable.
14    pub from_findbin: bool,
15}
16
17/// A `use lib` / `no lib` operation extracted from source in lexical order.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum UseLibAction {
20    /// Add paths to the effective include stack.
21    Add(Vec<UseLibPath>),
22    /// Remove paths from the effective include stack.
23    Remove(Vec<UseLibPath>),
24}
25
26/// Extract include paths from `use lib` statements in Perl source text.
27///
28/// Handles the following patterns:
29/// - `use lib 'path';`
30/// - `use lib "path";`
31/// - `use lib qw(path1 path2);`
32/// - `use lib qw/path1 path2/;`
33/// - `use lib ("path1", "path2");`
34/// - `use lib '$FindBin::Bin/path'` and `"$FindBin::Bin/path"`
35/// - `use lib '$Bin/path'` and `"$RealBin/path"` (from `FindBin` exports)
36///
37/// Returns extracted paths in order of appearance.
38pub fn extract_use_lib_paths(source: &str) -> Vec<UseLibPath> {
39    let mut paths = Vec::new();
40
41    for statement in split_perl_statements(source) {
42        let trimmed = statement.trim();
43        if let Some(rest) = strip_use_lib_prefix(trimmed) {
44            extract_paths_from_args(rest, &mut paths);
45        }
46    }
47
48    paths
49}
50
51/// Extract ordered `use lib` and `no lib` operations from source text.
52#[must_use]
53pub fn extract_use_lib_operations(source: &str) -> Vec<UseLibAction> {
54    let mut ops = Vec::new();
55
56    for statement in split_perl_statements(source) {
57        let trimmed = statement.trim();
58        if let Some(rest) = strip_use_lib_prefix(trimmed) {
59            let mut paths = Vec::new();
60            extract_paths_from_args(rest, &mut paths);
61            if !paths.is_empty() {
62                ops.push(UseLibAction::Add(paths));
63            }
64            continue;
65        }
66
67        if let Some(rest) = strip_no_lib_prefix(trimmed) {
68            let mut paths = Vec::new();
69            extract_paths_from_args(rest, &mut paths);
70            if !paths.is_empty() {
71                ops.push(UseLibAction::Remove(paths));
72            }
73        }
74    }
75
76    ops
77}
78
79fn split_perl_statements(source: &str) -> Vec<&str> {
80    let mut statements = Vec::new();
81    let mut start = 0;
82    let mut in_single = false;
83    let mut in_double = false;
84    let mut escaped = false;
85    // Whether any non-whitespace, non-comment content has appeared in the
86    // current statement since `start`.  When false and we hit a comment, we
87    // can safely advance `start` past the comment so it doesn't pollute the
88    // next statement slice.
89    let mut has_content = false;
90
91    let chars: Vec<(usize, char)> = source.char_indices().collect();
92    let mut i = 0;
93
94    while i < chars.len() {
95        let (idx, ch) = chars[i];
96
97        if escaped {
98            escaped = false;
99            i += 1;
100            continue;
101        }
102
103        if ch == '\\' && (in_single || in_double) {
104            escaped = true;
105            i += 1;
106            continue;
107        }
108
109        if ch == '\'' && !in_double {
110            in_single = !in_single;
111            has_content = true;
112            i += 1;
113            continue;
114        }
115
116        if ch == '"' && !in_single {
117            in_double = !in_double;
118            has_content = true;
119            i += 1;
120            continue;
121        }
122
123        // Skip Perl line comments: # ... <newline>
124        // A `#` is only a comment when outside of any string literal.
125        if ch == '#' && !in_single && !in_double {
126            // Skip to end of line (or end of source).
127            let comment_end = match source[idx..].find('\n') {
128                Some(nl_offset) => idx + nl_offset + 1,
129                None => source.len(),
130            };
131            // If no statement content has been seen yet, advance `start` past
132            // the comment so the comment text is not included in the next slice.
133            if !has_content {
134                start = comment_end;
135            }
136            // Skip the iterator past the comment.
137            while i < chars.len() && chars[i].0 < comment_end {
138                i += 1;
139            }
140            continue;
141        }
142
143        if ch == ';' && !in_single && !in_double {
144            let end = idx + ch.len_utf8();
145            statements.push(&source[start..end]);
146            start = end;
147            has_content = false;
148        } else if !ch.is_whitespace() {
149            has_content = true;
150        }
151
152        i += 1;
153    }
154
155    if start < source.len() {
156        statements.push(&source[start..]);
157    }
158
159    statements
160}
161
162/// Resolve `use lib` paths against a workspace root and optional file directory.
163///
164/// - Absolute paths are accepted only when they stay under `workspace_root`.
165/// - `$FindBin::Bin`-relative paths are resolved against `file_dir` (or `workspace_root` if absent).
166/// - Other relative paths are resolved against `workspace_root`.
167pub fn resolve_use_lib_paths(
168    use_lib_paths: &[UseLibPath],
169    workspace_root: &Path,
170    file_dir: Option<&Path>,
171) -> Vec<String> {
172    let mut result = Vec::new();
173
174    for ulp in use_lib_paths {
175        let path_str = &ulp.path;
176
177        if ulp.from_findbin {
178            let base = file_dir.unwrap_or(workspace_root);
179            let Some(resolved) = normalize_findbin_path(base, path_str) else {
180                continue;
181            };
182            if resolved.strip_prefix(workspace_root).is_err() {
183                continue;
184            }
185            if let Some(s) = path_to_relative_string(&resolved, workspace_root)
186                && !result.contains(&s)
187            {
188                result.push(s);
189            }
190        } else {
191            let p = Path::new(path_str);
192            if p.is_absolute() {
193                if let Some(s) = path_to_relative_string(p, workspace_root)
194                    && !result.contains(&s)
195                {
196                    result.push(s);
197                }
198            } else {
199                let s = path_str.to_string();
200                if !result.contains(&s) {
201                    result.push(s);
202                }
203            }
204        }
205    }
206
207    result
208}
209
210/// Resolve effective include paths from lexical `use lib` / `no lib` operations.
211#[must_use]
212pub fn resolve_use_lib_paths_from_source(
213    source: &str,
214    workspace_root: &Path,
215    file_dir: Option<&Path>,
216) -> Vec<String> {
217    resolve_use_lib_paths_from_source_at_offset(source, source.len(), workspace_root, file_dir)
218}
219
220/// Resolve effective include paths from lexical `use lib` / `no lib` operations,
221/// considering only source text up to the provided byte offset.
222#[must_use]
223pub fn resolve_use_lib_paths_from_source_at_offset(
224    source: &str,
225    offset: usize,
226    workspace_root: &Path,
227    file_dir: Option<&Path>,
228) -> Vec<String> {
229    let mut resolved = Vec::new();
230    let source_prefix = source.get(..offset).unwrap_or(source);
231    for op in extract_use_lib_operations(source_prefix) {
232        match op {
233            UseLibAction::Add(paths) => {
234                let added = resolve_use_lib_paths(&paths, workspace_root, file_dir);
235                for path in added.into_iter().rev() {
236                    resolved.retain(|existing| existing != &path);
237                    resolved.insert(0, path);
238                }
239            }
240            UseLibAction::Remove(paths) => {
241                for path in resolve_use_lib_paths(&paths, workspace_root, file_dir) {
242                    resolved.retain(|existing| existing != &path);
243                }
244            }
245        }
246    }
247    resolved
248}
249
250fn strip_use_lib_prefix(trimmed: &str) -> Option<&str> {
251    let rest = trimmed.strip_prefix("use")?;
252    if !rest.starts_with(|c: char| c.is_whitespace()) {
253        return None;
254    }
255    let rest = rest.trim_start();
256    let rest = rest.strip_prefix("lib")?;
257    if !rest.starts_with(|c: char| c.is_whitespace() || c == '(' || c == ';') {
258        return None;
259    }
260    Some(rest.trim_start())
261}
262
263fn strip_no_lib_prefix(trimmed: &str) -> Option<&str> {
264    let rest = trimmed.strip_prefix("no")?;
265    if !rest.starts_with(|c: char| c.is_whitespace()) {
266        return None;
267    }
268    let rest = rest.trim_start();
269    let rest = rest.strip_prefix("lib")?;
270    if !rest.starts_with(|c: char| c.is_whitespace() || c == '(' || c == ';') {
271        return None;
272    }
273    Some(rest.trim_start())
274}
275
276fn extract_paths_from_args(args: &str, out: &mut Vec<UseLibPath>) {
277    let args = args.trim_end_matches(';').trim();
278
279    if let Some(rest) = args.strip_prefix("qw") {
280        extract_qw_paths(rest.trim_start(), out);
281        return;
282    }
283
284    if let Some(inner) = strip_parens(args) {
285        extract_quoted_list(inner, out);
286        return;
287    }
288
289    extract_quoted_list(args, out);
290}
291
292fn extract_qw_paths(rest: &str, out: &mut Vec<UseLibPath>) {
293    let (open, close) = match rest.chars().next() {
294        Some('(') => ('(', ')'),
295        Some('/') => ('/', '/'),
296        Some('{') => ('{', '}'),
297        Some('[') => ('[', ']'),
298        Some('<') => ('<', '>'),
299        Some('!') => ('!', '!'),
300        _ => return,
301    };
302
303    let inner = &rest[open.len_utf8()..];
304    let end = inner.find(close).unwrap_or(inner.len());
305    let content = &inner[..end];
306
307    for word in content.split_whitespace() {
308        out.push(UseLibPath { path: word.to_string(), from_findbin: false });
309    }
310}
311
312fn strip_parens(s: &str) -> Option<&str> {
313    let s = s.trim();
314    let inner = s.strip_prefix('(')?;
315    let inner = inner.trim_end().strip_suffix(')')?;
316    Some(inner)
317}
318
319fn extract_quoted_list(s: &str, out: &mut Vec<UseLibPath>) {
320    let mut remaining = s.trim();
321
322    while !remaining.is_empty() {
323        remaining = remaining.trim_start_matches(|c: char| c == ',' || c.is_whitespace());
324        if remaining.is_empty() {
325            break;
326        }
327
328        // Skip Perl line comments: # ... <newline>
329        if remaining.starts_with('#') {
330            remaining = match remaining.find('\n') {
331                Some(nl) => &remaining[nl + 1..],
332                None => "",
333            };
334            continue;
335        }
336
337        if let Some((path, from_findbin, rest)) = extract_one_quoted(remaining) {
338            out.push(UseLibPath { path, from_findbin });
339            remaining = rest.trim_start_matches(|c: char| c == ',' || c.is_whitespace());
340        } else {
341            break;
342        }
343    }
344}
345
346fn extract_one_quoted(s: &str) -> Option<(String, bool, &str)> {
347    let s = s.trim();
348    let quote = match s.chars().next()? {
349        '\'' => '\'',
350        '"' => '"',
351        _ => return None,
352    };
353
354    let inner = &s[1..];
355    let end = inner.find(quote)?;
356    let content = &inner[..end];
357    let rest = &inner[end + 1..];
358
359    let (path, from_findbin) = resolve_findbin_in_string(content);
360    Some((path, from_findbin, rest))
361}
362
363fn resolve_findbin_in_string(s: &str) -> (String, bool) {
364    // Fully-qualified FindBin variables — no word-boundary ambiguity because `::` terminates
365    // the name and braced forms are unambiguous.
366    let qualified_vars =
367        ["$FindBin::Bin", "$FindBin::RealBin", "${FindBin::Bin}", "${FindBin::RealBin}"];
368
369    for var in &qualified_vars {
370        if let Some(rest) = s.strip_prefix(var) {
371            let path = rest.strip_prefix('/').unwrap_or(rest);
372            if path.is_empty() {
373                return (".".to_string(), true);
374            }
375            return (path.to_string(), true);
376        }
377    }
378
379    // Short exported forms: `$Bin`, `$RealBin`, `${Bin}`, `${RealBin}`.
380    // Braced forms (`${Bin}`) are always unambiguous.  Bare forms (`$Bin`,
381    // `$RealBin`) must be followed by `/`, end-of-string, or a non-identifier
382    // character to avoid false-positives on variables like `$BinDir` or
383    // `$RealBinPath`.
384    let bare_short = ["$Bin", "$RealBin"];
385    let braced_short = ["${Bin}", "${RealBin}"];
386
387    for var in &bare_short {
388        if let Some(rest) = s.strip_prefix(var) {
389            // Word-boundary check: the character after the variable name must
390            // not be a Perl identifier character (letter, digit, or `_`).
391            // This prevents `$BinDir` or `$RealBinPath` from matching `$Bin`/`$RealBin`.
392            let next = rest.chars().next();
393            if next.is_none() || next.is_some_and(|c| !c.is_alphanumeric() && c != '_') {
394                let path = rest.strip_prefix('/').unwrap_or(rest);
395                if path.is_empty() {
396                    return (".".to_string(), true);
397                }
398                return (path.to_string(), true);
399            }
400        }
401    }
402
403    for var in &braced_short {
404        if let Some(rest) = s.strip_prefix(var) {
405            let path = rest.strip_prefix('/').unwrap_or(rest);
406            if path.is_empty() {
407                return (".".to_string(), true);
408            }
409            return (path.to_string(), true);
410        }
411    }
412
413    (s.to_string(), false)
414}
415
416fn path_to_relative_string(path: &Path, workspace_root: &Path) -> Option<String> {
417    if let Ok(rel) = path.strip_prefix(workspace_root) {
418        // Guard against lexical strip_prefix matching an embedded `..` segment.
419        // For example, `/workspace/../etc` strips the `/workspace` prefix lexically,
420        // leaving `../etc` which would escape the workspace.  Reject any result
421        // that contains a parent-directory component.
422        if rel.components().any(|c| c == std::path::Component::ParentDir) {
423            return None;
424        }
425        let s = normalize_relative_path_string(rel.to_string_lossy().as_ref());
426        if s.is_empty() { Some(".".to_string()) } else { Some(s) }
427    } else if path.is_absolute() {
428        None
429    } else {
430        let s = normalize_relative_path_string(path.to_string_lossy().as_ref());
431        Some(s)
432    }
433}
434
435fn normalize_relative_path_string(path: &str) -> String {
436    path.replace('\\', "/")
437}
438
439fn normalize_findbin_path(base: &Path, relative: &str) -> Option<PathBuf> {
440    let mut normalized = PathBuf::from(base);
441    for component in Path::new(relative).components() {
442        match component {
443            Component::CurDir => {}
444            Component::Normal(segment) => normalized.push(segment),
445            Component::ParentDir => {
446                if !normalized.pop() {
447                    return None;
448                }
449            }
450            Component::RootDir | Component::Prefix(_) => return None,
451        }
452    }
453    Some(normalized)
454}