use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;
#[derive(Debug, Clone)]
pub struct TextEdit {
pub range: (usize, usize),
pub new_text: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ImportAnalysis {
pub unused_imports: Vec<UnusedImport>,
pub missing_imports: Vec<MissingImport>,
pub duplicate_imports: Vec<DuplicateImport>,
pub organization_suggestions: Vec<OrganizationSuggestion>,
pub imports: Vec<ImportEntry>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UnusedImport {
pub module: String,
pub symbols: Vec<String>,
pub line: usize,
pub reason: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MissingImport {
pub module: String,
pub symbols: Vec<String>,
pub suggested_location: usize,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DuplicateImport {
pub module: String,
pub lines: Vec<usize>,
pub can_merge: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OrganizationSuggestion {
pub description: String,
pub priority: SuggestionPriority,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportEntry {
pub module: String,
pub symbols: Vec<String>,
pub line: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum SuggestionPriority {
High,
Medium,
Low,
}
pub struct ImportOptimizer;
fn is_pragma_module(module: &str) -> bool {
matches!(
module,
"strict"
| "warnings"
| "utf8"
| "bytes"
| "locale"
| "integer"
| "less"
| "sigtrap"
| "subs"
| "vars"
| "feature"
| "autodie"
| "autouse"
| "base"
| "parent"
| "lib"
| "bigint"
| "bignum"
| "bigrat"
)
}
fn get_known_module_exports(module: &str) -> Option<Vec<&'static str>> {
match module {
"Data::Dumper" => Some(vec!["Dumper"]),
"JSON" => Some(vec!["encode_json", "decode_json", "to_json", "from_json"]),
"YAML" => Some(vec!["Load", "Dump", "LoadFile", "DumpFile"]),
"Storable" => Some(vec!["store", "retrieve", "freeze", "thaw"]),
"List::Util" => Some(vec!["first", "max", "min", "sum", "reduce", "shuffle", "uniq"]),
"Scalar::Util" => Some(vec!["blessed", "reftype", "looks_like_number", "weaken"]),
"File::Spec" => Some(vec!["catfile", "catdir", "splitpath", "splitdir"]),
"File::Basename" => Some(vec!["basename", "dirname", "fileparse"]),
"Cwd" => Some(vec!["getcwd", "abs_path", "realpath"]),
"Time::HiRes" => Some(vec!["time", "sleep", "usleep", "gettimeofday"]),
"Digest::MD5" => Some(vec!["md5", "md5_hex", "md5_base64"]),
"MIME::Base64" => Some(vec!["encode_base64", "decode_base64"]),
"URI::Escape" => Some(vec!["uri_escape", "uri_unescape"]),
"LWP::Simple" => Some(vec!["get", "head", "getprint", "getstore", "mirror"]),
"LWP::UserAgent" => Some(vec![]),
"CGI" => Some(vec!["param", "header", "start_html", "end_html"]),
"DBI" => Some(vec![]), "strict" => Some(vec![]), "warnings" => Some(vec![]), "utf8" => Some(vec![]), _ => None,
}
}
impl ImportOptimizer {
pub fn new() -> Self {
Self
}
pub fn analyze_file(&self, file_path: &Path) -> Result<ImportAnalysis, String> {
let content = std::fs::read_to_string(file_path).map_err(|e| e.to_string())?;
self.analyze_content(&content)
}
pub fn analyze_content(&self, content: &str) -> Result<ImportAnalysis, String> {
let re_use = Regex::new(r"^\s*use\s+([A-Za-z0-9_:]+)(?:\s+qw\(([^)]*)\))?\s*;")
.map_err(|e| e.to_string())?;
let mut imports = Vec::new();
for (idx, line) in content.lines().enumerate() {
if let Some(caps) = re_use.captures(line) {
let module = caps[1].to_string();
let symbols_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
let symbols = if symbols_str.is_empty() {
Vec::new()
} else {
symbols_str
.split_whitespace()
.filter(|s| !s.is_empty())
.map(|s| s.trim_matches(|c| c == ',' || c == ';' || c == '"'))
.map(|s| s.to_string())
.collect::<Vec<_>>()
};
imports.push(ImportEntry { module, symbols, line: idx + 1 });
}
}
let mut module_to_lines: BTreeMap<String, Vec<usize>> = BTreeMap::new();
for imp in &imports {
module_to_lines.entry(imp.module.clone()).or_default().push(imp.line);
}
let duplicate_imports = module_to_lines
.iter()
.filter(|(_, lines)| lines.len() > 1)
.map(|(module, lines)| DuplicateImport {
module: module.clone(),
lines: lines.clone(),
can_merge: true,
})
.collect::<Vec<_>>();
let non_use_content = content
.lines()
.filter(
|line| {
!line.trim_start().starts_with("use ") && !line.trim_start().starts_with("#")
}, )
.collect::<Vec<_>>()
.join(
"
",
);
let dumper_re = Regex::new(r"\bDumper\b").map_err(|e| e.to_string())?;
let mut unused_imports = Vec::new();
for imp in &imports {
let mut unused_symbols = Vec::new();
if !imp.symbols.is_empty() {
for sym in &imp.symbols {
let re = Regex::new(&format!(r"\b{}\b", regex::escape(sym)))
.map_err(|e| e.to_string())?;
if !re.is_match(&non_use_content) {
unused_symbols.push(sym.clone());
}
}
} else {
let is_pragma = matches!(
imp.module.as_str(),
"strict"
| "warnings"
| "utf8"
| "bytes"
| "integer"
| "locale"
| "overload"
| "sigtrap"
| "subs"
| "vars"
);
if !is_pragma {
let (is_known_module, known_exports) =
match get_known_module_exports(&imp.module) {
Some(exports) => (true, exports),
None => (false, Vec::new()),
};
let mut is_used = false;
let module_pattern = format!(r"\b{}\b", regex::escape(&imp.module));
let module_re = Regex::new(&module_pattern).map_err(|e| e.to_string())?;
if module_re.is_match(&non_use_content) {
is_used = true;
}
if !is_used {
let qualified_pattern = format!(r"{}::", regex::escape(&imp.module));
let qualified_re =
Regex::new(&qualified_pattern).map_err(|e| e.to_string())?;
if qualified_re.is_match(&non_use_content) {
is_used = true;
}
}
if !is_used && imp.module == "Data::Dumper" {
if dumper_re.is_match(&non_use_content) {
is_used = true;
}
}
if !is_used && !known_exports.is_empty() {
for export in &known_exports {
let export_pattern = format!(r"\b{}\b", regex::escape(export));
let export_re =
Regex::new(&export_pattern).map_err(|e| e.to_string())?;
if export_re.is_match(&non_use_content) {
is_used = true;
break;
}
}
}
if !is_used && is_known_module && known_exports.is_empty() {
unused_symbols.push("(bare import)".to_string());
}
}
}
if !unused_symbols.is_empty() {
unused_imports.push(UnusedImport {
module: imp.module.clone(),
symbols: unused_symbols,
line: imp.line,
reason: "Symbols not used in code".to_string(),
});
}
}
let imported_modules: BTreeSet<String> =
imports.iter().map(|imp| imp.module.clone()).collect();
let string_re = Regex::new("'[^']*'|\"[^\"]*\"").map_err(|e| e.to_string())?;
let stripped = string_re.replace_all(content, " ").to_string();
let regex_literal_re = Regex::new(r"qr/[^/]*/").map_err(|e| e.to_string())?;
let stripped = regex_literal_re.replace_all(&stripped, " ").to_string();
let comment_re = Regex::new(r"(?m)#.*$").map_err(|e| e.to_string())?;
let stripped = comment_re.replace_all(&stripped, " ").to_string();
let usage_re = Regex::new(
r"\b([A-Za-z_][A-Za-z0-9_]*(?:::[A-Za-z_][A-Za-z0-9_]*)*)::([A-Za-z_][A-Za-z0-9_]*)",
)
.map_err(|e| e.to_string())?;
let mut usage_map: BTreeMap<String, Vec<String>> = BTreeMap::new();
for caps in usage_re.captures_iter(&stripped) {
if let (Some(module_match), Some(symbol_match)) = (caps.get(1), caps.get(2)) {
let module = module_match.as_str().to_string();
let symbol = symbol_match.as_str().to_string();
if imported_modules.contains(&module) || is_pragma_module(&module) {
continue;
}
usage_map.entry(module).or_default().push(symbol);
}
}
let last_import_line = imports.iter().map(|i| i.line).max().unwrap_or(0);
let missing_imports = usage_map
.into_iter()
.map(|(module, mut symbols)| {
symbols.sort();
symbols.dedup();
MissingImport {
module,
symbols,
suggested_location: last_import_line + 1,
confidence: 0.8,
}
})
.collect::<Vec<_>>();
let mut organization_suggestions = Vec::new();
let module_order: Vec<String> = imports.iter().map(|i| i.module.clone()).collect();
let mut sorted_order = module_order.clone();
sorted_order.sort();
if module_order != sorted_order {
organization_suggestions.push(OrganizationSuggestion {
description: "Sort import statements alphabetically".to_string(),
priority: SuggestionPriority::Low,
});
}
if !duplicate_imports.is_empty() {
let modules =
duplicate_imports.iter().map(|d| d.module.clone()).collect::<Vec<_>>().join(", ");
organization_suggestions.push(OrganizationSuggestion {
description: format!("Remove duplicate imports for modules: {}", modules),
priority: SuggestionPriority::Medium,
});
}
let mut symbols_need_org = false;
for imp in &imports {
if imp.symbols.len() > 1 {
let mut sorted = imp.symbols.clone();
sorted.sort();
sorted.dedup();
if sorted != imp.symbols {
symbols_need_org = true;
break;
}
}
}
if symbols_need_org {
organization_suggestions.push(OrganizationSuggestion {
description: "Sort and deduplicate symbols within import statements".to_string(),
priority: SuggestionPriority::Low,
});
}
Ok(ImportAnalysis {
imports,
unused_imports,
missing_imports,
duplicate_imports,
organization_suggestions,
})
}
pub fn generate_optimized_imports(&self, analysis: &ImportAnalysis) -> String {
let mut optimized_imports = Vec::new();
let mut module_symbols: BTreeMap<String, Vec<String>> = BTreeMap::new();
let mut unused_by_module: BTreeMap<String, Vec<String>> = BTreeMap::new();
for unused in &analysis.unused_imports {
unused_by_module
.entry(unused.module.clone())
.or_default()
.extend(unused.symbols.clone());
}
for import in &analysis.imports {
let kept_symbols: Vec<String> = import
.symbols
.iter()
.filter(|sym| {
if let Some(unused_symbols) = unused_by_module.get(&import.module) {
!unused_symbols.contains(sym)
} else {
true }
})
.cloned()
.collect();
let entry = module_symbols.entry(import.module.clone()).or_default();
entry.extend(kept_symbols);
entry.sort();
entry.dedup();
}
for missing in &analysis.missing_imports {
let entry = module_symbols.entry(missing.module.clone()).or_default();
entry.extend(missing.symbols.clone());
entry.sort();
entry.dedup();
}
for (module, symbols) in &module_symbols {
let was_bare_import =
analysis.imports.iter().any(|imp| imp.module == *module && imp.symbols.is_empty());
if symbols.is_empty() && was_bare_import {
optimized_imports.push(format!("use {};", module));
} else if !symbols.is_empty() {
let symbol_list = symbols.join(" ");
optimized_imports.push(format!("use {} qw({});", module, symbol_list));
}
}
optimized_imports.sort();
optimized_imports.join("\n")
}
pub fn generate_edits(&self, content: &str, analysis: &ImportAnalysis) -> Vec<TextEdit> {
let optimized = self.generate_optimized_imports(analysis);
if analysis.imports.is_empty() {
if optimized.is_empty() {
return Vec::new();
}
let insert_line =
analysis.missing_imports.first().map(|m| m.suggested_location).unwrap_or(1);
let insert_offset = self.line_offset(content, insert_line);
return vec![TextEdit {
range: (insert_offset, insert_offset),
new_text: optimized + "\n",
}];
}
let first_line = analysis.imports.iter().map(|i| i.line).min().unwrap_or(1);
let last_line = analysis.imports.iter().map(|i| i.line).max().unwrap_or(1);
let start_offset = self.line_offset(content, first_line);
let end_offset = self.line_offset(content, last_line + 1);
vec![TextEdit {
range: (start_offset, end_offset),
new_text: if optimized.is_empty() { String::new() } else { optimized + "\n" },
}]
}
fn line_offset(&self, content: &str, line: usize) -> usize {
if line <= 1 {
return 0;
}
let mut offset = 0;
for (idx, l) in content.lines().enumerate() {
if idx + 1 >= line {
break;
}
offset += l.len() + 1; }
offset
}
}
impl Default for ImportOptimizer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
fn create_test_file(content: &str) -> Result<(TempDir, PathBuf), Box<dyn std::error::Error>> {
let temp_dir = TempDir::new()?;
let file_path = temp_dir.path().join("test.pl");
fs::write(&file_path, content)?;
Ok((temp_dir, file_path))
}
#[test]
fn test_basic_import_analysis() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
print Dumper(\@ARGV);
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert_eq!(analysis.imports.len(), 3);
assert_eq!(analysis.imports[0].module, "strict");
assert_eq!(analysis.imports[1].module, "warnings");
assert_eq!(analysis.imports[2].module, "Data::Dumper");
assert!(analysis.unused_imports.is_empty());
Ok(())
}
#[test]
fn test_unused_import_detection() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"use strict;
use warnings;
use Data::Dumper; # This is not used
use JSON; # This is not used
print "Hello World\n";
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert!(analysis.unused_imports.is_empty());
Ok(())
}
#[test]
fn test_missing_import_detection() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"use strict;
use warnings;
# Using JSON::encode_json without importing JSON
my $json = JSON::encode_json({key => 'value'});
# Using Data::Dumper::Dumper without importing Data::Dumper
print Data::Dumper::Dumper(\@ARGV);
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert_eq!(analysis.missing_imports.len(), 2);
assert!(analysis.missing_imports.iter().any(|m| m.module == "JSON"));
assert!(analysis.missing_imports.iter().any(|m| m.module == "Data::Dumper"));
for m in &analysis.missing_imports {
assert_eq!(m.suggested_location, 3);
}
Ok(())
}
#[test]
fn test_duplicate_import_detection() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"use strict;
use warnings;
use Data::Dumper;
use JSON;
use Data::Dumper; # Duplicate
print Dumper(\@ARGV);
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert_eq!(analysis.duplicate_imports.len(), 1);
assert_eq!(analysis.duplicate_imports[0].module, "Data::Dumper");
assert_eq!(analysis.duplicate_imports[0].lines.len(), 2);
assert!(analysis.duplicate_imports[0].can_merge);
Ok(())
}
#[test]
fn test_organization_suggestions() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"use warnings;
use strict;
use List::Util qw(max max min);
use Data::Dumper;
use Data::Dumper; # duplicate
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert!(
analysis
.organization_suggestions
.iter()
.any(|s| s.description.contains("Sort import statements"))
);
assert!(
analysis
.organization_suggestions
.iter()
.any(|s| s.description.contains("Remove duplicate imports"))
);
assert!(
analysis
.organization_suggestions
.iter()
.any(|s| s.description.contains("Sort and deduplicate symbols"))
);
Ok(())
}
#[test]
fn test_qw_import_parsing() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"use List::Util qw(first max min sum);
use Scalar::Util qw(blessed reftype);
my @nums = (1, 2, 3, 4, 5);
print "Max: " . max(@nums) . "\n";
print "Sum: " . sum(@nums) . "\n";
print "First: " . first { $_ > 3 } @nums;
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert_eq!(analysis.imports.len(), 2);
let list_util = analysis
.imports
.iter()
.find(|i| i.module == "List::Util")
.ok_or("List::Util import not found")?;
assert_eq!(list_util.symbols, vec!["first", "max", "min", "sum"]);
let scalar_util = analysis
.imports
.iter()
.find(|i| i.module == "Scalar::Util")
.ok_or("Scalar::Util import not found")?;
assert_eq!(scalar_util.symbols, vec!["blessed", "reftype"]);
assert_eq!(analysis.unused_imports.len(), 2);
let list_util_unused = analysis
.unused_imports
.iter()
.find(|u| u.module == "List::Util")
.ok_or("List::Util unused imports not found")?;
assert_eq!(list_util_unused.symbols, vec!["min"]);
let scalar_util_unused = analysis
.unused_imports
.iter()
.find(|u| u.module == "Scalar::Util")
.ok_or("Scalar::Util unused imports not found")?;
assert_eq!(scalar_util_unused.symbols, vec!["blessed", "reftype"]);
Ok(())
}
#[test]
fn test_generate_optimized_imports() {
let optimizer = ImportOptimizer::new();
let analysis = ImportAnalysis {
imports: vec![
ImportEntry { module: "strict".to_string(), symbols: vec![], line: 1 },
ImportEntry { module: "warnings".to_string(), symbols: vec![], line: 2 },
ImportEntry {
module: "List::Util".to_string(),
symbols: vec!["first".to_string(), "max".to_string(), "unused".to_string()],
line: 3,
},
],
unused_imports: vec![UnusedImport {
module: "List::Util".to_string(),
symbols: vec!["unused".to_string()],
line: 3,
reason: "Symbol not used".to_string(),
}],
missing_imports: vec![MissingImport {
module: "Data::Dumper".to_string(),
symbols: vec!["Dumper".to_string()],
suggested_location: 10,
confidence: 0.8,
}],
duplicate_imports: vec![],
organization_suggestions: vec![],
};
let optimized = optimizer.generate_optimized_imports(&analysis);
let expected_lines = [
"use Data::Dumper qw(Dumper);",
"use List::Util qw(first max);",
"use strict;",
"use warnings;",
];
assert_eq!(optimized, expected_lines.join("\n"));
}
#[test]
fn test_empty_file_analysis() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = "";
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert!(analysis.imports.is_empty());
assert!(analysis.unused_imports.is_empty());
assert!(analysis.missing_imports.is_empty());
assert!(analysis.duplicate_imports.is_empty());
Ok(())
}
#[test]
fn test_complex_perl_code_analysis() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use JSON qw(encode_json decode_json);
use LWP::UserAgent; # Unused
use File::Spec::Functions qw(catfile catdir);
# Complex code with various patterns
my $data = { key => 'value', numbers => [1, 2, 3] };
my $json_string = encode_json($data);
print "JSON: $json_string\n";
# Using File::Spec but not all imported functions
my $path = catfile('/tmp', 'test.json');
print "Path: $path\n";
# Using modules without explicit imports
my $response = HTTP::Tiny::new()->get('http://example.com');
print Dumper($response);
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert!(analysis.unused_imports.iter().any(|u| u.module == "LWP::UserAgent"));
let file_spec_unused =
analysis.unused_imports.iter().find(|u| u.module == "File::Spec::Functions");
if let Some(unused) = file_spec_unused {
assert!(unused.symbols.contains(&"catdir".to_string()));
}
assert!(analysis.missing_imports.iter().any(|m| m.module == "HTTP::Tiny"));
Ok(())
}
#[test]
fn test_bare_import_with_exports_detection() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"use strict;
use warnings;
use Data::Dumper; # Used
use JSON; # Unused - has exports but none are used
use SomeUnknownModule; # Conservative - not marked as unused
print Dumper(\@ARGV);
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert!(!analysis.unused_imports.iter().any(|u| u.module == "Data::Dumper"));
assert!(analysis.unused_imports.is_empty());
Ok(())
}
#[test]
fn test_regex_edge_cases() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"use strict;
use warnings;
# These should not be detected as module references
my $string = "This is not JSON::encode_json in a string";
my $regex = qr/Data::Dumper/;
print "Module::Name is just text";
# This should be detected
my $result = JSON::encode_json({test => 1});
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert_eq!(analysis.missing_imports.len(), 1);
assert_eq!(analysis.missing_imports[0].module, "JSON");
Ok(())
}
#[test]
fn test_malformed_regex_capture_safety() -> Result<(), Box<dyn std::error::Error>> {
let optimizer = ImportOptimizer::new();
let content = r#"use strict;
use warnings;
# Normal module usage
my $result = JSON::encode_json({test => 1});
# Edge case patterns that might not fully match the regex
my $incomplete = "Something::";
my $partial = "::Function";
"#;
let (_temp_dir, file_path) = create_test_file(content)?;
let analysis = optimizer.analyze_file(&file_path)?;
assert_eq!(analysis.missing_imports.len(), 1);
assert_eq!(analysis.missing_imports[0].module, "JSON");
Ok(())
}
}