perl_refactoring/refactor/
inline.rs1use std::collections::HashMap;
22
23const MAX_BODY_LINES: usize = 50;
25
26#[derive(Debug, Clone)]
32pub enum InlineError {
33 SubNotFound {
35 name: String,
37 },
38 Recursive {
40 name: String,
42 },
43 TooLarge {
45 name: String,
47 line_count: usize,
49 },
50 MultipleReturns {
53 name: String,
55 count: usize,
57 },
58 CallSiteParseFailed {
60 message: String,
62 },
63}
64
65impl std::fmt::Display for InlineError {
66 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67 match self {
68 InlineError::SubNotFound { name } => {
69 write!(f, "subroutine '{}' not found in source", name)
70 }
71 InlineError::Recursive { name } => {
72 write!(f, "cannot inline recursive subroutine '{}'", name)
73 }
74 InlineError::TooLarge { name, line_count } => {
75 write!(
76 f,
77 "subroutine '{}' is too large to inline ({} lines, max {})",
78 name, line_count, MAX_BODY_LINES
79 )
80 }
81 InlineError::MultipleReturns { name, count } => {
82 write!(
83 f,
84 "subroutine '{}' has {} return points; only single-return subs can be inlined",
85 name, count
86 )
87 }
88 InlineError::CallSiteParseFailed { message } => {
89 write!(f, "failed to parse call site: {}", message)
90 }
91 }
92 }
93}
94
95impl std::error::Error for InlineError {}
96
97#[derive(Debug, Clone)]
103pub enum InlineAbility {
104 Ok {
106 params: Vec<String>,
108 body: String,
110 has_side_effects: bool,
112 },
113}
114
115pub fn analyze_sub_for_inlining(
124 source: &str,
125 sub_name: &str,
126) -> Result<InlineAbility, InlineError> {
127 let parsed = parse_sub_definition(source, sub_name)
128 .ok_or_else(|| InlineError::SubNotFound { name: sub_name.to_string() })?;
129
130 if body_calls_self(&parsed.body, sub_name) {
132 return Err(InlineError::Recursive { name: sub_name.to_string() });
133 }
134
135 let body_line_count = parsed.body.lines().count();
137 if body_line_count > MAX_BODY_LINES {
138 return Err(InlineError::TooLarge {
139 name: sub_name.to_string(),
140 line_count: body_line_count,
141 });
142 }
143
144 let return_count = count_return_statements(&parsed.body);
146 if return_count > 1 {
147 return Err(InlineError::MultipleReturns {
148 name: sub_name.to_string(),
149 count: return_count,
150 });
151 }
152
153 let side_effects = has_side_effects(&parsed.body);
154
155 Ok(InlineAbility::Ok {
156 params: parsed.params,
157 body: parsed.body,
158 has_side_effects: side_effects,
159 })
160}
161
162pub struct SubInliner {
167 source: String,
168}
169
170impl SubInliner {
171 pub fn new(source: &str) -> Self {
173 Self { source: source.to_string() }
174 }
175
176 pub fn inline_call(&self, sub_name: &str, call_expr: &str) -> Result<String, InlineError> {
183 let (inlined, _warnings) = self.inline_call_inner(sub_name, call_expr, &[])?;
184 Ok(inlined)
185 }
186
187 pub fn inline_call_with_warnings(
189 &self,
190 sub_name: &str,
191 call_expr: &str,
192 ) -> Result<(String, Vec<String>), InlineError> {
193 self.inline_call_inner(sub_name, call_expr, &[])
194 }
195
196 pub fn inline_call_with_outer_vars(
199 &self,
200 sub_name: &str,
201 call_expr: &str,
202 outer_vars: &[String],
203 ) -> Result<String, InlineError> {
204 let (inlined, _warnings) = self.inline_call_inner(sub_name, call_expr, outer_vars)?;
205 Ok(inlined)
206 }
207
208 fn inline_call_inner(
213 &self,
214 sub_name: &str,
215 call_expr: &str,
216 outer_vars: &[String],
217 ) -> Result<(String, Vec<String>), InlineError> {
218 let ability = analyze_sub_for_inlining(&self.source, sub_name)?;
219 let InlineAbility::Ok { params, body, has_side_effects } = ability;
220
221 let mut warnings = Vec::new();
222 if has_side_effects {
223 warnings.push(format!(
224 "subroutine '{}' contains side effects (print/warn/die/I/O); \
225 inlining preserves them but may change semantics",
226 sub_name
227 ));
228 }
229
230 let args = extract_call_args(call_expr, sub_name)?;
232
233 let mut sub_map: HashMap<String, String> = HashMap::new();
235 for (i, param) in params.iter().enumerate() {
236 let arg = args.get(i).cloned().unwrap_or_default();
237 sub_map.insert(param.clone(), arg);
238 }
239
240 let body = rename_collisions(&body, outer_vars);
242
243 let substituted = substitute_params(&body, &sub_map);
245
246 let expr = extract_return_expr(&substituted);
248
249 Ok((expr, warnings))
250 }
251}
252
253struct ParsedSub {
259 params: Vec<String>,
261 body: String,
263}
264
265fn parse_sub_definition(source: &str, sub_name: &str) -> Option<ParsedSub> {
276 let start = find_sub_start(source, sub_name)?;
277
278 let body_start = source[start..].find('{').map(|i| start + i + 1)?;
280 let body_raw = extract_balanced_braces(source, body_start)?;
281
282 let (params, body_without_params) = extract_params_line(&body_raw);
284
285 Some(ParsedSub { params, body: body_without_params })
286}
287
288fn find_sub_start(source: &str, sub_name: &str) -> Option<usize> {
290 let mut pos = 0;
291 while pos < source.len() {
292 let rest = &source[pos..];
293 if let Some(idx) = rest.find("sub ") {
294 let after_sub = &rest[idx + 4..];
295 let trimmed = after_sub.trim_start();
296 if let Some(after_name) = trimmed.strip_prefix(sub_name) {
297 let boundary_ok =
299 after_name.chars().next().is_none_or(|c| !c.is_alphanumeric() && c != '_');
300 if boundary_ok && after_name.trim_start().starts_with('{') {
301 return Some(pos + idx);
302 }
303 }
304 pos += idx + 4;
305 } else {
306 break;
307 }
308 }
309 None
310}
311
312fn extract_balanced_braces(source: &str, open_pos: usize) -> Option<String> {
315 let mut depth = 1usize;
316 let chars: Vec<char> = source[open_pos..].chars().collect();
317 let mut end = 0;
318 let mut found = false;
319 let mut i = 0;
320 while i < chars.len() {
321 match chars[i] {
322 '{' => depth += 1,
323 '}' => {
324 depth -= 1;
325 if depth == 0 {
326 end = i;
327 found = true;
328 break;
329 }
330 }
331 _ => {}
332 }
333 i += 1;
334 }
335 if !found {
336 return None;
337 }
338 Some(chars[..end].iter().collect())
339}
340
341fn extract_params_line(body: &str) -> (Vec<String>, String) {
346 for (i, line) in body.lines().enumerate() {
347 let trimmed = line.trim();
348 if trimmed.starts_with("my (") && trimmed.contains("= @_") {
349 let params = parse_param_names(trimmed);
350 let remaining: String = body
351 .lines()
352 .enumerate()
353 .filter(|(j, _)| *j != i)
354 .map(|(_, l)| l)
355 .collect::<Vec<_>>()
356 .join("\n");
357 return (params, remaining);
358 }
359 }
360 (vec![], body.to_string())
361}
362
363fn parse_param_names(line: &str) -> Vec<String> {
366 let open = match line.find('(') {
367 Some(i) => i,
368 None => return vec![],
369 };
370 let close = match line.rfind(')') {
371 Some(i) => i,
372 None => return vec![],
373 };
374 if close <= open {
375 return vec![];
376 }
377 let inner = &line[open + 1..close];
378 inner
379 .split(',')
380 .map(|s| s.trim().trim_start_matches(['$', '@', '%']).to_string())
381 .filter(|s| !s.is_empty())
382 .collect()
383}
384
385fn count_return_statements(body: &str) -> usize {
394 let mut count = 0usize;
395 let mut pos = 0;
396 let mut in_single_quote = false;
397 let mut in_double_quote = false;
398 let bytes = body.as_bytes();
399
400 while pos < body.len() {
401 let b = bytes[pos];
402
403 match b {
405 b'\\' if in_single_quote || in_double_quote => {
406 pos += 2;
408 continue;
409 }
410 b'\'' if !in_double_quote => {
411 in_single_quote = !in_single_quote;
412 pos += 1;
413 continue;
414 }
415 b'"' if !in_single_quote => {
416 in_double_quote = !in_double_quote;
417 pos += 1;
418 continue;
419 }
420 _ => {}
421 }
422
423 if !in_single_quote && !in_double_quote {
425 let rest = &body[pos..];
426 if rest.starts_with("return") {
427 let before_ok = if pos > 0 {
429 let prev = bytes[pos - 1];
430 !prev.is_ascii_alphanumeric() && prev != b'_'
431 } else {
432 true
433 };
434 let after_pos = pos + 6;
436 let after_ok = if after_pos < body.len() {
437 let next = bytes[after_pos];
438 !next.is_ascii_alphanumeric() && next != b'_'
439 } else {
440 true
441 };
442 if before_ok && after_ok {
443 count += 1;
444 }
445 pos += 6;
446 continue;
447 }
448 }
449
450 pos += body[pos..].chars().next().map_or(1, |c| c.len_utf8());
451 }
452 count
453}
454
455fn has_side_effects(body: &str) -> bool {
457 const SIDE_EFFECT_KEYWORDS: &[&str] = &[
458 "print ", "warn ", "die ", "open ", "close ", "read ", "write ", "seek ", "sysread",
459 "syswrite", "printf", "say ",
460 ];
461 SIDE_EFFECT_KEYWORDS.iter().any(|kw| body.contains(kw))
462}
463
464fn body_calls_self(body: &str, sub_name: &str) -> bool {
470 let call_pattern = format!("{}(", sub_name);
471 let bytes = body.as_bytes();
472 let mut pos = 0;
473 let mut in_single_quote = false;
474 let mut in_double_quote = false;
475
476 while pos < body.len() {
477 let b = bytes[pos];
478 match b {
479 b'\\' if in_single_quote || in_double_quote => {
480 pos += 2;
481 continue;
482 }
483 b'\'' if !in_double_quote => {
484 in_single_quote = !in_single_quote;
485 pos += 1;
486 continue;
487 }
488 b'"' if !in_single_quote => {
489 in_double_quote = !in_double_quote;
490 pos += 1;
491 continue;
492 }
493 _ => {}
494 }
495 if !in_single_quote && !in_double_quote && body[pos..].starts_with(&call_pattern) {
496 return true;
497 }
498 pos += body[pos..].chars().next().map_or(1, |c| c.len_utf8());
499 }
500 false
501}
502
503fn extract_call_args(call_expr: &str, sub_name: &str) -> Result<Vec<String>, InlineError> {
509 let sub_pos = call_expr.find(sub_name).ok_or_else(|| InlineError::CallSiteParseFailed {
510 message: format!("call expression does not contain sub name '{}'", sub_name),
511 })?;
512
513 let after_name_pos = sub_pos + sub_name.len();
514 let rest = call_expr[after_name_pos..].trim_start();
515 if !rest.starts_with('(') {
516 return Ok(vec![]);
518 }
519
520 let paren_offset = call_expr[after_name_pos..].find('(').unwrap_or(0);
522 let open_abs = after_name_pos + paren_offset;
523
524 let close_abs = find_matching_paren(call_expr, open_abs).ok_or_else(|| {
525 InlineError::CallSiteParseFailed {
526 message: "unmatched parenthesis in call expression".to_string(),
527 }
528 })?;
529
530 let args_str = &call_expr[open_abs + 1..close_abs];
531 if args_str.trim().is_empty() {
532 return Ok(vec![]);
533 }
534
535 Ok(split_args(args_str))
536}
537
538fn find_matching_paren(s: &str, open: usize) -> Option<usize> {
540 let bytes = s.as_bytes();
541 if bytes.get(open) != Some(&b'(') {
542 return None;
543 }
544 let mut depth = 0usize;
545 for (i, &b) in bytes.iter().enumerate().skip(open) {
546 match b {
547 b'(' => depth += 1,
548 b')' => {
549 depth -= 1;
550 if depth == 0 {
551 return Some(i);
552 }
553 }
554 _ => {}
555 }
556 }
557 None
558}
559
560fn split_args(args_str: &str) -> Vec<String> {
562 let mut result = Vec::new();
563 let mut current = String::new();
564 let mut depth = 0usize;
565 let mut in_single_quote = false;
566 let mut in_double_quote = false;
567 let chars: Vec<char> = args_str.chars().collect();
568 let mut i = 0;
569
570 while i < chars.len() {
571 let c = chars[i];
572 match c {
573 '\\' if in_double_quote || in_single_quote => {
574 current.push(c);
575 i += 1;
576 if i < chars.len() {
577 current.push(chars[i]);
578 }
579 }
580 '\'' if !in_double_quote => {
581 in_single_quote = !in_single_quote;
582 current.push(c);
583 }
584 '"' if !in_single_quote => {
585 in_double_quote = !in_double_quote;
586 current.push(c);
587 }
588 '(' | '[' | '{' if !in_single_quote && !in_double_quote => {
589 depth += 1;
590 current.push(c);
591 }
592 ')' | ']' | '}' if !in_single_quote && !in_double_quote => {
593 depth = depth.saturating_sub(1);
594 current.push(c);
595 }
596 ',' if depth == 0 && !in_single_quote && !in_double_quote => {
597 result.push(current.trim().to_string());
598 current = String::new();
599 }
600 _ => current.push(c),
601 }
602 i += 1;
603 }
604
605 if !current.trim().is_empty() {
606 result.push(current.trim().to_string());
607 }
608
609 result
610}
611
612fn substitute_params(body: &str, sub_map: &HashMap<String, String>) -> String {
624 let mut result = body.to_string();
625 let mut pairs: Vec<(&String, &String)> = sub_map.iter().collect();
626 pairs.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
627
628 for (param, arg) in pairs {
629 let var = format!("${}", param);
630 result = replace_whole_var(&result, &var, arg);
631 }
632 result
633}
634
635fn rename_collisions(body: &str, outer_vars: &[String]) -> String {
638 let mut result = body.to_string();
639 for outer in outer_vars {
640 let bare = outer.trim_start_matches(['$', '@', '%']);
641 let my_decl = format!("my ${}", bare);
642 if result.contains(&my_decl) {
643 let renamed_bare = format!("{}_inlined", bare);
644 let renamed_decl = format!("my ${}", renamed_bare);
645 result = replace_whole_var(&result, &my_decl, &renamed_decl);
648 let var = format!("${}", bare);
652 let renamed_var = format!("${}", renamed_bare);
653 result = replace_whole_var(&result, &var, &renamed_var);
655 }
656 }
657 result
658}
659
660fn replace_whole_var(text: &str, var: &str, replacement: &str) -> String {
663 let mut result = String::with_capacity(text.len());
664 let mut pos = 0;
665 while pos < text.len() {
666 if text[pos..].starts_with(var) {
667 let after = pos + var.len();
668 let next_is_alphanum =
669 text[after..].chars().next().is_some_and(|c| c.is_alphanumeric() || c == '_');
670 if !next_is_alphanum {
671 result.push_str(replacement);
672 pos = after;
673 continue;
674 }
675 }
676 let c = text[pos..].chars().next().unwrap_or('\0');
677 result.push(c);
678 pos += c.len_utf8();
679 }
680 result
681}
682
683fn extract_return_expr(body: &str) -> String {
687 for line in body.lines() {
688 let trimmed = line.trim();
689 if trimmed.starts_with("return ") {
690 let expr = trimmed.trim_start_matches("return ").trim_end_matches(';').trim();
691 return format!("({})", expr);
692 }
693 }
694 body.trim().to_string()
695}