Skip to main content

perl_refactoring/refactor/
inline.rs

1//! Subroutine inlining for Perl code.
2//!
3//! Provides text-based subroutine inlining that replaces a call site with
4//! the function's body after substituting formal parameters with the actual
5//! arguments from the call.
6//!
7//! # Limitations
8//!
9//! This is a text-pattern implementation. It does not build a full AST and
10//! therefore relies on heuristics for:
11//! - Parameter extraction (assumes `my ($a, $b, …) = @_;` pattern)
12//! - Return detection (counts `return` keywords)
13//! - Recursion detection (looks for the sub name inside the body)
14//! - Side-effect detection (looks for `print`, `warn`, `die`, `open`, `close`,
15//!   `write`, `read`, `seek`, `sysread`, `syswrite`)
16//!
17//! Functions that do not follow these conventions may not be inlined correctly.
18//! The safe defaults are to **reject** when uncertain (recursion, large body,
19//! multiple returns) and to **warn** when side effects are detected.
20
21use std::collections::HashMap;
22
23/// Maximum number of body lines before the inliner rejects the function.
24const MAX_BODY_LINES: usize = 50;
25
26// ---------------------------------------------------------------------------
27// Error type
28// ---------------------------------------------------------------------------
29
30/// Error type returned by subroutine inlining operations.
31#[derive(Debug, Clone)]
32pub enum InlineError {
33    /// The target subroutine was not found in the provided source.
34    SubNotFound {
35        /// Name of the subroutine that was searched for.
36        name: String,
37    },
38    /// The subroutine calls itself (direct recursion) and cannot be inlined.
39    Recursive {
40        /// Name of the recursive subroutine.
41        name: String,
42    },
43    /// The subroutine body has too many lines to inline safely.
44    TooLarge {
45        /// Name of the subroutine.
46        name: String,
47        /// Actual line count of the body.
48        line_count: usize,
49    },
50    /// The subroutine has more than one `return` statement, which requires
51    /// control-flow restructuring beyond simple text substitution.
52    MultipleReturns {
53        /// Name of the subroutine.
54        name: String,
55        /// Number of `return` statements found.
56        count: usize,
57    },
58    /// The call site expression could not be parsed (wrong argument count, etc.).
59    CallSiteParseFailed {
60        /// Diagnostic message.
61        message: String,
62    },
63}
64
65impl std::fmt::Display for InlineError {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        match self {
68            InlineError::SubNotFound { name } => {
69                write!(f, "subroutine '{}' not found in source", name)
70            }
71            InlineError::Recursive { name } => {
72                write!(f, "cannot inline recursive subroutine '{}'", name)
73            }
74            InlineError::TooLarge { name, line_count } => {
75                write!(
76                    f,
77                    "subroutine '{}' is too large to inline ({} lines, max {})",
78                    name, line_count, MAX_BODY_LINES
79                )
80            }
81            InlineError::MultipleReturns { name, count } => {
82                write!(
83                    f,
84                    "subroutine '{}' has {} return points; only single-return subs can be inlined",
85                    name, count
86                )
87            }
88            InlineError::CallSiteParseFailed { message } => {
89                write!(f, "failed to parse call site: {}", message)
90            }
91        }
92    }
93}
94
95impl std::error::Error for InlineError {}
96
97// ---------------------------------------------------------------------------
98// Analysis result
99// ---------------------------------------------------------------------------
100
101/// The result of analysing a subroutine's inlineability.
102#[derive(Debug, Clone)]
103pub enum InlineAbility {
104    /// The subroutine can be inlined.
105    Ok {
106        /// Formal parameter names (without sigils) in declaration order.
107        params: Vec<String>,
108        /// The body text, stripped of the parameter-extraction line.
109        body: String,
110        /// Whether the body contains operations with observable side effects.
111        has_side_effects: bool,
112    },
113}
114
115// ---------------------------------------------------------------------------
116// Public API
117// ---------------------------------------------------------------------------
118
119/// Analyse whether a named subroutine can be inlined.
120///
121/// Returns `Ok(InlineAbility::Ok { … })` when safe to inline, or an
122/// [`InlineError`] when the subroutine must not be inlined.
123pub fn analyze_sub_for_inlining(
124    source: &str,
125    sub_name: &str,
126) -> Result<InlineAbility, InlineError> {
127    let parsed = parse_sub_definition(source, sub_name)
128        .ok_or_else(|| InlineError::SubNotFound { name: sub_name.to_string() })?;
129
130    // Recursion check
131    if body_calls_self(&parsed.body, sub_name) {
132        return Err(InlineError::Recursive { name: sub_name.to_string() });
133    }
134
135    // Size check
136    let body_line_count = parsed.body.lines().count();
137    if body_line_count > MAX_BODY_LINES {
138        return Err(InlineError::TooLarge {
139            name: sub_name.to_string(),
140            line_count: body_line_count,
141        });
142    }
143
144    // Multiple-return check
145    let return_count = count_return_statements(&parsed.body);
146    if return_count > 1 {
147        return Err(InlineError::MultipleReturns {
148            name: sub_name.to_string(),
149            count: return_count,
150        });
151    }
152
153    let side_effects = has_side_effects(&parsed.body);
154
155    Ok(InlineAbility::Ok {
156        params: parsed.params,
157        body: parsed.body,
158        has_side_effects: side_effects,
159    })
160}
161
162/// Text-based Perl subroutine inliner.
163///
164/// Create one instance per source file and call [`inline_call`] (or its
165/// variants) to produce the inlined expression text.
166pub struct SubInliner {
167    source: String,
168}
169
170impl SubInliner {
171    /// Create a new inliner from Perl source text.
172    pub fn new(source: &str) -> Self {
173        Self { source: source.to_string() }
174    }
175
176    /// Inline a single call to `sub_name`.
177    ///
178    /// `call_expr` is the full call expression string, e.g. `"add(3, 4)"`.
179    ///
180    /// Returns the replacement text (the inlined expression), or an
181    /// [`InlineError`] if the subroutine cannot be inlined.
182    pub fn inline_call(&self, sub_name: &str, call_expr: &str) -> Result<String, InlineError> {
183        let (inlined, _warnings) = self.inline_call_inner(sub_name, call_expr, &[])?;
184        Ok(inlined)
185    }
186
187    /// Like [`inline_call`] but also returns any warnings (e.g. side effects).
188    pub fn inline_call_with_warnings(
189        &self,
190        sub_name: &str,
191        call_expr: &str,
192    ) -> Result<(String, Vec<String>), InlineError> {
193        self.inline_call_inner(sub_name, call_expr, &[])
194    }
195
196    /// Like [`inline_call`] but accepts a list of variable names that already
197    /// exist in the outer scope, so collisions can be detected and renamed.
198    pub fn inline_call_with_outer_vars(
199        &self,
200        sub_name: &str,
201        call_expr: &str,
202        outer_vars: &[String],
203    ) -> Result<String, InlineError> {
204        let (inlined, _warnings) = self.inline_call_inner(sub_name, call_expr, outer_vars)?;
205        Ok(inlined)
206    }
207
208    // ------------------------------------------------------------------
209    // Internal
210    // ------------------------------------------------------------------
211
212    fn inline_call_inner(
213        &self,
214        sub_name: &str,
215        call_expr: &str,
216        outer_vars: &[String],
217    ) -> Result<(String, Vec<String>), InlineError> {
218        let ability = analyze_sub_for_inlining(&self.source, sub_name)?;
219        let InlineAbility::Ok { params, body, has_side_effects } = ability;
220
221        let mut warnings = Vec::new();
222        if has_side_effects {
223            warnings.push(format!(
224                "subroutine '{}' contains side effects (print/warn/die/I/O); \
225                 inlining preserves them but may change semantics",
226                sub_name
227            ));
228        }
229
230        // Extract arguments from call expression
231        let args = extract_call_args(call_expr, sub_name)?;
232
233        // Build substitution map: param_name -> arg_text
234        let mut sub_map: HashMap<String, String> = HashMap::new();
235        for (i, param) in params.iter().enumerate() {
236            let arg = args.get(i).cloned().unwrap_or_default();
237            sub_map.insert(param.clone(), arg);
238        }
239
240        // Rename local variables to avoid outer-scope collisions
241        let body = rename_collisions(&body, outer_vars);
242
243        // Substitute parameters in body
244        let substituted = substitute_params(&body, &sub_map);
245
246        // Extract the return expression from the body
247        let expr = extract_return_expr(&substituted);
248
249        Ok((expr, warnings))
250    }
251}
252
253// ---------------------------------------------------------------------------
254// Parsing helpers
255// ---------------------------------------------------------------------------
256
257/// Parsed representation of a subroutine definition extracted from source.
258struct ParsedSub {
259    /// Formal parameter names (sigils stripped).
260    params: Vec<String>,
261    /// Body text with the parameter line removed.
262    body: String,
263}
264
265/// Extract a subroutine definition from source text.
266///
267/// Recognises the pattern:
268/// ```text
269/// sub NAME {
270///     …body…
271/// }
272/// ```
273///
274/// Returns `None` if the pattern is not found.
275fn parse_sub_definition(source: &str, sub_name: &str) -> Option<ParsedSub> {
276    let start = find_sub_start(source, sub_name)?;
277
278    // Find the matching closing brace
279    let body_start = source[start..].find('{').map(|i| start + i + 1)?;
280    let body_raw = extract_balanced_braces(source, body_start)?;
281
282    // Extract parameter line: "my ($a, $b) = @_;"
283    let (params, body_without_params) = extract_params_line(&body_raw);
284
285    Some(ParsedSub { params, body: body_without_params })
286}
287
288/// Find the byte offset of `sub NAME` followed by `{` in `source`.
289fn find_sub_start(source: &str, sub_name: &str) -> Option<usize> {
290    let mut pos = 0;
291    while pos < source.len() {
292        let rest = &source[pos..];
293        if let Some(idx) = rest.find("sub ") {
294            let after_sub = &rest[idx + 4..];
295            let trimmed = after_sub.trim_start();
296            if let Some(after_name) = trimmed.strip_prefix(sub_name) {
297                // Verify it's a word boundary (not "sub foobar" when looking for "foo")
298                let boundary_ok =
299                    after_name.chars().next().is_none_or(|c| !c.is_alphanumeric() && c != '_');
300                if boundary_ok && after_name.trim_start().starts_with('{') {
301                    return Some(pos + idx);
302                }
303            }
304            pos += idx + 4;
305        } else {
306            break;
307        }
308    }
309    None
310}
311
312/// Extract the text between a matching pair of braces starting at `open_pos`
313/// (the position AFTER the opening `{`).
314fn extract_balanced_braces(source: &str, open_pos: usize) -> Option<String> {
315    let mut depth = 1usize;
316    let chars: Vec<char> = source[open_pos..].chars().collect();
317    let mut end = 0;
318    let mut found = false;
319    let mut i = 0;
320    while i < chars.len() {
321        match chars[i] {
322            '{' => depth += 1,
323            '}' => {
324                depth -= 1;
325                if depth == 0 {
326                    end = i;
327                    found = true;
328                    break;
329                }
330            }
331            _ => {}
332        }
333        i += 1;
334    }
335    if !found {
336        return None;
337    }
338    Some(chars[..end].iter().collect())
339}
340
341/// Parse out the Perl parameter-extraction line `my ($a, $b) = @_;` from the
342/// top of the body, returning (params, remaining_body).
343///
344/// If no such line is found, returns ([], original_body).
345fn extract_params_line(body: &str) -> (Vec<String>, String) {
346    for (i, line) in body.lines().enumerate() {
347        let trimmed = line.trim();
348        if trimmed.starts_with("my (") && trimmed.contains("= @_") {
349            let params = parse_param_names(trimmed);
350            let remaining: String = body
351                .lines()
352                .enumerate()
353                .filter(|(j, _)| *j != i)
354                .map(|(_, l)| l)
355                .collect::<Vec<_>>()
356                .join("\n");
357            return (params, remaining);
358        }
359    }
360    (vec![], body.to_string())
361}
362
363/// Extract parameter names from `my ($a, $b) = @_;`, returning bare names
364/// without sigils.
365fn parse_param_names(line: &str) -> Vec<String> {
366    let open = match line.find('(') {
367        Some(i) => i,
368        None => return vec![],
369    };
370    let close = match line.rfind(')') {
371        Some(i) => i,
372        None => return vec![],
373    };
374    if close <= open {
375        return vec![];
376    }
377    let inner = &line[open + 1..close];
378    inner
379        .split(',')
380        .map(|s| s.trim().trim_start_matches(['$', '@', '%']).to_string())
381        .filter(|s| !s.is_empty())
382        .collect()
383}
384
385// ---------------------------------------------------------------------------
386// Body analysis helpers
387// ---------------------------------------------------------------------------
388
389/// Count occurrences of the `return` keyword as a standalone token in `body`.
390///
391/// Skips occurrences inside single- or double-quoted string literals so that
392/// `my $msg = "will return a value";` is not counted as a return statement.
393fn count_return_statements(body: &str) -> usize {
394    let mut count = 0usize;
395    let mut pos = 0;
396    let mut in_single_quote = false;
397    let mut in_double_quote = false;
398    let bytes = body.as_bytes();
399
400    while pos < body.len() {
401        let b = bytes[pos];
402
403        // Track string context — handle backslash escapes
404        match b {
405            b'\\' if in_single_quote || in_double_quote => {
406                // Skip escaped character
407                pos += 2;
408                continue;
409            }
410            b'\'' if !in_double_quote => {
411                in_single_quote = !in_single_quote;
412                pos += 1;
413                continue;
414            }
415            b'"' if !in_single_quote => {
416                in_double_quote = !in_double_quote;
417                pos += 1;
418                continue;
419            }
420            _ => {}
421        }
422
423        // Only count `return` tokens outside string literals
424        if !in_single_quote && !in_double_quote {
425            let rest = &body[pos..];
426            if rest.starts_with("return") {
427                // Check character before
428                let before_ok = if pos > 0 {
429                    let prev = bytes[pos - 1];
430                    !prev.is_ascii_alphanumeric() && prev != b'_'
431                } else {
432                    true
433                };
434                // Check character after
435                let after_pos = pos + 6;
436                let after_ok = if after_pos < body.len() {
437                    let next = bytes[after_pos];
438                    !next.is_ascii_alphanumeric() && next != b'_'
439                } else {
440                    true
441                };
442                if before_ok && after_ok {
443                    count += 1;
444                }
445                pos += 6;
446                continue;
447            }
448        }
449
450        pos += body[pos..].chars().next().map_or(1, |c| c.len_utf8());
451    }
452    count
453}
454
455/// Check whether the body contains observable side-effect operations.
456fn has_side_effects(body: &str) -> bool {
457    const SIDE_EFFECT_KEYWORDS: &[&str] = &[
458        "print ", "warn ", "die ", "open ", "close ", "read ", "write ", "seek ", "sysread",
459        "syswrite", "printf", "say ",
460    ];
461    SIDE_EFFECT_KEYWORDS.iter().any(|kw| body.contains(kw))
462}
463
464/// Check whether the body calls itself (direct recursion).
465///
466/// Skips occurrences of `sub_name(` that appear inside string literals to
467/// avoid false-positive recursion detection when the sub name is merely
468/// mentioned in a string (e.g. `my $msg = "add(1,2) adds two numbers"`).
469fn body_calls_self(body: &str, sub_name: &str) -> bool {
470    let call_pattern = format!("{}(", sub_name);
471    let bytes = body.as_bytes();
472    let mut pos = 0;
473    let mut in_single_quote = false;
474    let mut in_double_quote = false;
475
476    while pos < body.len() {
477        let b = bytes[pos];
478        match b {
479            b'\\' if in_single_quote || in_double_quote => {
480                pos += 2;
481                continue;
482            }
483            b'\'' if !in_double_quote => {
484                in_single_quote = !in_single_quote;
485                pos += 1;
486                continue;
487            }
488            b'"' if !in_single_quote => {
489                in_double_quote = !in_double_quote;
490                pos += 1;
491                continue;
492            }
493            _ => {}
494        }
495        if !in_single_quote && !in_double_quote && body[pos..].starts_with(&call_pattern) {
496            return true;
497        }
498        pos += body[pos..].chars().next().map_or(1, |c| c.len_utf8());
499    }
500    false
501}
502
503// ---------------------------------------------------------------------------
504// Argument extraction
505// ---------------------------------------------------------------------------
506
507/// Extract the argument list from a call expression like `foo(1, 2, "bar")`.
508fn extract_call_args(call_expr: &str, sub_name: &str) -> Result<Vec<String>, InlineError> {
509    let sub_pos = call_expr.find(sub_name).ok_or_else(|| InlineError::CallSiteParseFailed {
510        message: format!("call expression does not contain sub name '{}'", sub_name),
511    })?;
512
513    let after_name_pos = sub_pos + sub_name.len();
514    let rest = call_expr[after_name_pos..].trim_start();
515    if !rest.starts_with('(') {
516        // Bare call with no parens — no arguments
517        return Ok(vec![]);
518    }
519
520    // Find '(' absolute position
521    let paren_offset = call_expr[after_name_pos..].find('(').unwrap_or(0);
522    let open_abs = after_name_pos + paren_offset;
523
524    let close_abs = find_matching_paren(call_expr, open_abs).ok_or_else(|| {
525        InlineError::CallSiteParseFailed {
526            message: "unmatched parenthesis in call expression".to_string(),
527        }
528    })?;
529
530    let args_str = &call_expr[open_abs + 1..close_abs];
531    if args_str.trim().is_empty() {
532        return Ok(vec![]);
533    }
534
535    Ok(split_args(args_str))
536}
537
538/// Find the matching `)` for the `(` at byte position `open` in `s`.
539fn find_matching_paren(s: &str, open: usize) -> Option<usize> {
540    let bytes = s.as_bytes();
541    if bytes.get(open) != Some(&b'(') {
542        return None;
543    }
544    let mut depth = 0usize;
545    for (i, &b) in bytes.iter().enumerate().skip(open) {
546        match b {
547            b'(' => depth += 1,
548            b')' => {
549                depth -= 1;
550                if depth == 0 {
551                    return Some(i);
552                }
553            }
554            _ => {}
555        }
556    }
557    None
558}
559
560/// Split a comma-separated argument string, respecting nested parens and quotes.
561fn split_args(args_str: &str) -> Vec<String> {
562    let mut result = Vec::new();
563    let mut current = String::new();
564    let mut depth = 0usize;
565    let mut in_single_quote = false;
566    let mut in_double_quote = false;
567    let chars: Vec<char> = args_str.chars().collect();
568    let mut i = 0;
569
570    while i < chars.len() {
571        let c = chars[i];
572        match c {
573            '\\' if in_double_quote || in_single_quote => {
574                current.push(c);
575                i += 1;
576                if i < chars.len() {
577                    current.push(chars[i]);
578                }
579            }
580            '\'' if !in_double_quote => {
581                in_single_quote = !in_single_quote;
582                current.push(c);
583            }
584            '"' if !in_single_quote => {
585                in_double_quote = !in_double_quote;
586                current.push(c);
587            }
588            '(' | '[' | '{' if !in_single_quote && !in_double_quote => {
589                depth += 1;
590                current.push(c);
591            }
592            ')' | ']' | '}' if !in_single_quote && !in_double_quote => {
593                depth = depth.saturating_sub(1);
594                current.push(c);
595            }
596            ',' if depth == 0 && !in_single_quote && !in_double_quote => {
597                result.push(current.trim().to_string());
598                current = String::new();
599            }
600            _ => current.push(c),
601        }
602        i += 1;
603    }
604
605    if !current.trim().is_empty() {
606        result.push(current.trim().to_string());
607    }
608
609    result
610}
611
612// ---------------------------------------------------------------------------
613// Body transformation
614// ---------------------------------------------------------------------------
615
616/// Replace occurrences of `$param_name` in `body` with the corresponding
617/// argument text.
618///
619/// Uses word-boundary-aware replacement to avoid corrupting longer variable
620/// names that share a prefix with a parameter (e.g. replacing `$price` must
621/// not corrupt `$price_adjusted`).  Sorted by descending name length so that
622/// longer names are never shadowed by shorter prefix matches.
623fn substitute_params(body: &str, sub_map: &HashMap<String, String>) -> String {
624    let mut result = body.to_string();
625    let mut pairs: Vec<(&String, &String)> = sub_map.iter().collect();
626    pairs.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
627
628    for (param, arg) in pairs {
629        let var = format!("${}", param);
630        result = replace_whole_var(&result, &var, arg);
631    }
632    result
633}
634
635/// Rename local variable declarations in `body` that collide with names in
636/// `outer_vars`, appending `_inlined` to the bare name.
637fn rename_collisions(body: &str, outer_vars: &[String]) -> String {
638    let mut result = body.to_string();
639    for outer in outer_vars {
640        let bare = outer.trim_start_matches(['$', '@', '%']);
641        let my_decl = format!("my ${}", bare);
642        if result.contains(&my_decl) {
643            let renamed_bare = format!("{}_inlined", bare);
644            let renamed_decl = format!("my ${}", renamed_bare);
645            // Replace the declaration first — use word-boundary-aware replacement so
646            // that "my $x" does not corrupt "my $x_count" when the outer var is "$x".
647            result = replace_whole_var(&result, &my_decl, &renamed_decl);
648            // Then replace all uses of $bare that are not the new $bare_inlined
649            // We do this by replacing "$bare" with "$bare_inlined" across the body,
650            // but we already renamed the declaration above so the decl is safe.
651            let var = format!("${}", bare);
652            let renamed_var = format!("${}", renamed_bare);
653            // Only replace if not already part of a longer name
654            result = replace_whole_var(&result, &var, &renamed_var);
655        }
656    }
657    result
658}
659
660/// Replace occurrences of `var` in `text` that are complete variable
661/// references (not a prefix of a longer variable name).
662fn replace_whole_var(text: &str, var: &str, replacement: &str) -> String {
663    let mut result = String::with_capacity(text.len());
664    let mut pos = 0;
665    while pos < text.len() {
666        if text[pos..].starts_with(var) {
667            let after = pos + var.len();
668            let next_is_alphanum =
669                text[after..].chars().next().is_some_and(|c| c.is_alphanumeric() || c == '_');
670            if !next_is_alphanum {
671                result.push_str(replacement);
672                pos = after;
673                continue;
674            }
675        }
676        let c = text[pos..].chars().next().unwrap_or('\0');
677        result.push(c);
678        pos += c.len_utf8();
679    }
680    result
681}
682
683/// Extract the expression value from a body containing a single `return`.
684///
685/// Returns `(expr)` for `return expr;`, or the trimmed body if no `return`.
686fn extract_return_expr(body: &str) -> String {
687    for line in body.lines() {
688        let trimmed = line.trim();
689        if trimmed.starts_with("return ") {
690            let expr = trimmed.trim_start_matches("return ").trim_end_matches(';').trim();
691            return format!("({})", expr);
692        }
693    }
694    body.trim().to_string()
695}