perl-refactoring 0.12.1

//! Import optimization for Perl modules
//!
//! This module analyzes import statements and usage to optimize imports by:
//! - Detecting unused imports and symbols
//! - Finding duplicate import statements
//! - Consolidating imports to reduce clutter
//! - Generating optimized import statements
//!
//! ## LSP Workflow Integration
//!
//! Import optimization operates within the **Perl LSP analysis pipeline**:
//! **Parse → Index → Navigate → Complete → Analyze**
//!
//! - **Parse Stage**: Identifies import statements during Perl source analysis
//! - **Index Stage**: Builds symbol index and resolves import dependencies
//! - **Navigate Stage**: Tracks cross-file import dependencies for refactoring
//! - **Complete Stage**: Generates optimized import statements for code actions
//! - **Analyze Stage**: Updates workspace symbols and reference tracking
//!
//! Critical for maintaining clean imports in enterprise Perl development workflows
//! where large Perl codebases require systematic dependency management.
//!
//! ## Performance
//!
//! - **Time complexity**: O(n) over import statements with O(1) symbol lookups
//! - **Space complexity**: O(n) for import maps and symbol sets (memory bounded)
//! - **Optimizations**: Fast-path parsing and deduplication to keep performance stable
//! - **Benchmarks**: Typically <5ms per file in large workspace scans
//! - **Large file scaling**: Designed to scale across large file sets (50GB PST-style)
//!
//! ## Example
//!
//! ```rust,ignore
//! use perl_parser::import_optimizer::ImportOptimizer;
//! use std::path::Path;
//!
//! let optimizer = ImportOptimizer::new();
//! let analysis = optimizer.analyze_file(Path::new("script.pl"))?;
//! let optimized_imports = optimizer.generate_optimized_imports(&analysis);
//! println!("{}", optimized_imports);
//! # Ok::<(), String>(())
//! ```

use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;

/// TextEdit for import optimization (local type for byte-offset ranges)
///
/// This is separate from LSP types which use line/character positions.
/// Used internally for applying import optimization edits to source text.
#[derive(Debug, Clone)]
pub struct TextEdit {
    /// Byte offset range (start, end) in the source text
    pub range: (usize, usize),
    /// Replacement text
    pub new_text: String,
}

/// Result of import analysis containing all detected issues and suggestions
#[derive(Debug, Serialize, Deserialize)]
pub struct ImportAnalysis {
    /// Import statements with unused symbols
    pub unused_imports: Vec<UnusedImport>,
    /// Symbols that are used but not imported
    pub missing_imports: Vec<MissingImport>,
    /// Modules that are imported multiple times
    pub duplicate_imports: Vec<DuplicateImport>,
    /// Suggestions for organizing imports
    pub organization_suggestions: Vec<OrganizationSuggestion>,
    /// All imports discovered in the file
    pub imports: Vec<ImportEntry>,
}

/// An import statement containing unused symbols
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UnusedImport {
    /// Module name
    pub module: String,
    /// List of unused symbols from this import
    pub symbols: Vec<String>,
    /// Line number where this import statement appears (1-indexed)
    pub line: usize,
    /// Reason why symbols are considered unused
    pub reason: String,
}

/// A symbol that is used but not imported
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MissingImport {
    /// Module name that should be imported
    pub module: String,
    /// List of symbols that need to be imported
    pub symbols: Vec<String>,
    /// Suggested line number to insert the import
    pub suggested_location: usize,
    /// Confidence level of the suggestion (0.0 to 1.0)
    pub confidence: f32,
}

/// A module that is imported multiple times
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DuplicateImport {
    /// Module name that is duplicated
    pub module: String,
    /// Line numbers where this module is imported (1-indexed)
    pub lines: Vec<usize>,
    /// Whether these imports can be safely merged
    pub can_merge: bool,
}

/// A suggestion for improving import organization
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OrganizationSuggestion {
    /// Human-readable description of the suggestion
    pub description: String,
    /// Priority level of this suggestion
    pub priority: SuggestionPriority,
}

/// A single import statement discovered during analysis
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportEntry {
    /// Module name
    pub module: String,
    /// List of imported symbols (empty for bare imports)
    pub symbols: Vec<String>,
    /// Line number where this import appears (1-indexed)
    pub line: usize,
}

/// Priority level for organization suggestions
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum SuggestionPriority {
    /// High priority - should be addressed immediately
    High,
    /// Medium priority - should be addressed when convenient
    Medium,
    /// Low priority - can be addressed later
    Low,
}

/// Import optimizer for analyzing and optimizing Perl import statements
///
/// The optimizer currently supports:
/// - Parsing basic `use Module qw(symbols)` statements
/// - Detecting unused imported symbols
/// - Finding duplicate imports that can be merged
/// - Generating consolidated import statements
pub struct ImportOptimizer;

/// Check if a module is a pragma (affects compilation, no exports)
fn is_pragma_module(module: &str) -> bool {
    matches!(
        module,
        "strict"
            | "warnings"
            | "utf8"
            | "bytes"
            | "locale"
            | "integer"
            | "less"
            | "sigtrap"
            | "subs"
            | "vars"
            | "feature"
            | "autodie"
            | "autouse"
            | "base"
            | "parent"
            | "lib"
            | "bigint"
            | "bignum"
            | "bigrat"
    )
}

/// Get known exports for popular Perl modules
fn get_known_module_exports(module: &str) -> Option<Vec<&'static str>> {
    match module {
        "Data::Dumper" => Some(vec!["Dumper"]),
        "JSON" => Some(vec!["encode_json", "decode_json", "to_json", "from_json"]),
        "YAML" => Some(vec!["Load", "Dump", "LoadFile", "DumpFile"]),
        "Storable" => Some(vec!["store", "retrieve", "freeze", "thaw"]),
        "List::Util" => Some(vec!["first", "max", "min", "sum", "reduce", "shuffle", "uniq"]),
        "Scalar::Util" => Some(vec!["blessed", "reftype", "looks_like_number", "weaken"]),
        "File::Spec" => Some(vec!["catfile", "catdir", "splitpath", "splitdir"]),
        "File::Basename" => Some(vec!["basename", "dirname", "fileparse"]),
        "Cwd" => Some(vec!["getcwd", "abs_path", "realpath"]),
        "Time::HiRes" => Some(vec!["time", "sleep", "usleep", "gettimeofday"]),
        "Digest::MD5" => Some(vec!["md5", "md5_hex", "md5_base64"]),
        "MIME::Base64" => Some(vec!["encode_base64", "decode_base64"]),
        "URI::Escape" => Some(vec!["uri_escape", "uri_unescape"]),
        "LWP::Simple" => Some(vec!["get", "head", "getprint", "getstore", "mirror"]),
        "LWP::UserAgent" => Some(vec![]),
        "CGI" => Some(vec!["param", "header", "start_html", "end_html"]),
        "DBI" => Some(vec![]),    // DBI is object-oriented, no default exports
        "strict" => Some(vec![]), // Pragma, no exports
        "warnings" => Some(vec![]), // Pragma, no exports
        "utf8" => Some(vec![]),   // Pragma, no exports
        _ => None,
    }
}

impl ImportOptimizer {
    /// Create a new import optimizer for Analyze-stage refactorings.
    ///
    /// # Returns
    ///
    /// A ready-to-use `ImportOptimizer` instance.
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// use perl_parser::import_optimizer::ImportOptimizer;
    ///
    /// let optimizer = ImportOptimizer::new();
    /// let _ = optimizer;
    /// ```
    pub fn new() -> Self {
        Self
    }

    /// Analyze imports in a Perl file during the Analyze stage.
    ///
    /// # Arguments
    /// * `file_path` - Path to the Perl file to analyze.
    /// # Returns
    /// `ImportAnalysis` with detected issues on success.
    /// # Errors
    /// Returns an error string if the file cannot be read or parsing fails.
    /// # Examples
    /// ```rust,ignore
    /// use perl_parser::import_optimizer::ImportOptimizer;
    ///
    /// let optimizer = ImportOptimizer::new();
    /// let _analysis = optimizer.analyze_file(std::path::Path::new("script.pl"))?;
    /// # Ok::<(), String>(())
    /// ```
    pub fn analyze_file(&self, file_path: &Path) -> Result<ImportAnalysis, String> {
        let content = std::fs::read_to_string(file_path).map_err(|e| e.to_string())?;
        self.analyze_content(&content)
    }

    /// Analyze imports in Perl content during the Analyze stage.
    ///
    /// # Arguments
    /// * `content` - The Perl source code content to analyze.
    /// # Returns
    /// `ImportAnalysis` with detected issues on success.
    /// # Errors
    /// Returns an error string if regex parsing or analysis fails.
    /// # Examples
    /// ```rust,ignore
    /// use perl_parser::import_optimizer::ImportOptimizer;
    ///
    /// let optimizer = ImportOptimizer::new();
    /// let analysis = optimizer.analyze_content("use strict;")?;
    /// assert!(analysis.imports.len() >= 1);
    /// # Ok::<(), String>(())
    /// ```
    pub fn analyze_content(&self, content: &str) -> Result<ImportAnalysis, String> {
        // Regex for basic `use` statement parsing
        let re_use = Regex::new(r"^\s*use\s+([A-Za-z0-9_:]+)(?:\s+qw\(([^)]*)\))?\s*;")
            .map_err(|e| e.to_string())?;

        let mut imports = Vec::new();
        for (idx, line) in content.lines().enumerate() {
            if let Some(caps) = re_use.captures(line) {
                let module = caps[1].to_string();
                let symbols_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
                let symbols = if symbols_str.is_empty() {
                    Vec::new()
                } else {
                    symbols_str
                        .split_whitespace()
                        .filter(|s| !s.is_empty())
                        .map(|s| s.trim_matches(|c| c == ',' || c == ';' || c == '"'))
                        .map(|s| s.to_string())
                        .collect::<Vec<_>>()
                };
                imports.push(ImportEntry { module, symbols, line: idx + 1 });
            }
        }

        // Build map for duplicate detection
        let mut module_to_lines: BTreeMap<String, Vec<usize>> = BTreeMap::new();
        for imp in &imports {
            module_to_lines.entry(imp.module.clone()).or_default().push(imp.line);
        }
        let duplicate_imports = module_to_lines
            .iter()
            .filter(|(_, lines)| lines.len() > 1)
            .map(|(module, lines)| DuplicateImport {
                module: module.clone(),
                lines: lines.clone(),
                can_merge: true,
            })
            .collect::<Vec<_>>();

        // Build content without `use` lines for symbol usage detection
        let non_use_content = content
            .lines()
            .filter(
                |line| {
                    !line.trim_start().starts_with("use ") && !line.trim_start().starts_with("#")
                }, // Exclude comment lines
            )
            .collect::<Vec<_>>()
            .join(
                "
",
            );

        // Pre-compile regex for special Data::Dumper case
        let dumper_re = Regex::new(r"\bDumper\b").map_err(|e| e.to_string())?;

        // Determine unused symbols for each import entry
        let mut unused_imports = Vec::new();
        for imp in &imports {
            let mut unused_symbols = Vec::new();

            // If there are explicit symbols (like qw()), check each one
            if !imp.symbols.is_empty() {
                for sym in &imp.symbols {
                    let re = Regex::new(&format!(r"\b{}\b", regex::escape(sym)))
                        .map_err(|e| e.to_string())?;

                    // Check if symbol is used in non-use content
                    if !re.is_match(&non_use_content) {
                        unused_symbols.push(sym.clone());
                    }
                }
            } else {
                // Skip pragma modules like strict, warnings, etc.
                let is_pragma = matches!(
                    imp.module.as_str(),
                    "strict"
                        | "warnings"
                        | "utf8"
                        | "bytes"
                        | "integer"
                        | "locale"
                        | "overload"
                        | "sigtrap"
                        | "subs"
                        | "vars"
                );

                if !is_pragma {
                    // For bare imports (without qw()), check if the module or any of its known exports are used
                    let (is_known_module, known_exports) =
                        match get_known_module_exports(&imp.module) {
                            Some(exports) => (true, exports),
                            None => (false, Vec::new()),
                        };
                    let mut is_used = false;

                    // First check if the module is directly referenced (e.g., Module::function)
                    let module_pattern = format!(r"\b{}\b", regex::escape(&imp.module));
                    let module_re = Regex::new(&module_pattern).map_err(|e| e.to_string())?;
                    if module_re.is_match(&non_use_content) {
                        is_used = true;
                    }

                    // Also check for qualified function calls like Module::function
                    if !is_used {
                        let qualified_pattern = format!(r"{}::", regex::escape(&imp.module));
                        let qualified_re =
                            Regex::new(&qualified_pattern).map_err(|e| e.to_string())?;
                        if qualified_re.is_match(&non_use_content) {
                            is_used = true;
                        }
                    }

                    // Special handling for Data::Dumper - check for Dumper function usage
                    if !is_used && imp.module == "Data::Dumper" {
                        if dumper_re.is_match(&non_use_content) {
                            is_used = true;
                        }
                    }

                    // Then check if any known exports are used
                    if !is_used && !known_exports.is_empty() {
                        for export in &known_exports {
                            let export_pattern = format!(r"\b{}\b", regex::escape(export));
                            let export_re =
                                Regex::new(&export_pattern).map_err(|e| e.to_string())?;
                            if export_re.is_match(&non_use_content) {
                                is_used = true;
                                break;
                            }
                        }
                    }

                    // Conservative approach: Don't flag bare imports as unused if they have exports
                    // Modules with exports might have side effects or implicit behavior we can't detect
                    // But modules with no exports (like LWP::UserAgent) can still be flagged if unused
                    if !is_used && is_known_module && known_exports.is_empty() {
                        unused_symbols.push("(bare import)".to_string());
                    }
                }
            }

            // Create unused import entry if there are unused symbols
            if !unused_symbols.is_empty() {
                unused_imports.push(UnusedImport {
                    module: imp.module.clone(),
                    symbols: unused_symbols,
                    line: imp.line,
                    reason: "Symbols not used in code".to_string(),
                });
            }
        }

        // Missing import detection
        let imported_modules: BTreeSet<String> =
            imports.iter().map(|imp| imp.module.clone()).collect();

        // Strip strings and comments before scanning for Module::symbol patterns
        let string_re = Regex::new("'[^']*'|\"[^\"]*\"").map_err(|e| e.to_string())?;
        let stripped = string_re.replace_all(content, " ").to_string();
        let regex_literal_re = Regex::new(r"qr/[^/]*/").map_err(|e| e.to_string())?;
        let stripped = regex_literal_re.replace_all(&stripped, " ").to_string();
        let comment_re = Regex::new(r"(?m)#.*$").map_err(|e| e.to_string())?;
        let stripped = comment_re.replace_all(&stripped, " ").to_string();

        let usage_re = Regex::new(
            r"\b([A-Za-z_][A-Za-z0-9_]*(?:::[A-Za-z_][A-Za-z0-9_]*)*)::([A-Za-z_][A-Za-z0-9_]*)",
        )
        .map_err(|e| e.to_string())?;
        let mut usage_map: BTreeMap<String, Vec<String>> = BTreeMap::new();
        for caps in usage_re.captures_iter(&stripped) {
            // Only process if both capture groups matched
            if let (Some(module_match), Some(symbol_match)) = (caps.get(1), caps.get(2)) {
                let module = module_match.as_str().to_string();
                let symbol = symbol_match.as_str().to_string();

                if imported_modules.contains(&module) || is_pragma_module(&module) {
                    continue;
                }

                usage_map.entry(module).or_default().push(symbol);
            }
        }
        let last_import_line = imports.iter().map(|i| i.line).max().unwrap_or(0);
        let missing_imports = usage_map
            .into_iter()
            .map(|(module, mut symbols)| {
                symbols.sort();
                symbols.dedup();
                MissingImport {
                    module,
                    symbols,
                    suggested_location: last_import_line + 1,
                    confidence: 0.8,
                }
            })
            .collect::<Vec<_>>();

        // Generate organization suggestions
        let mut organization_suggestions = Vec::new();

        // Suggest sorting of import statements
        let module_order: Vec<String> = imports.iter().map(|i| i.module.clone()).collect();
        let mut sorted_order = module_order.clone();
        sorted_order.sort();
        if module_order != sorted_order {
            organization_suggestions.push(OrganizationSuggestion {
                description: "Sort import statements alphabetically".to_string(),
                priority: SuggestionPriority::Low,
            });
        }

        // Suggest removing duplicate imports
        if !duplicate_imports.is_empty() {
            let modules =
                duplicate_imports.iter().map(|d| d.module.clone()).collect::<Vec<_>>().join(", ");
            organization_suggestions.push(OrganizationSuggestion {
                description: format!("Remove duplicate imports for modules: {}", modules),
                priority: SuggestionPriority::Medium,
            });
        }

        // Suggest sorting/deduplicating symbols within imports
        let mut symbols_need_org = false;
        for imp in &imports {
            if imp.symbols.len() > 1 {
                let mut sorted = imp.symbols.clone();
                sorted.sort();
                sorted.dedup();
                if sorted != imp.symbols {
                    symbols_need_org = true;
                    break;
                }
            }
        }
        if symbols_need_org {
            organization_suggestions.push(OrganizationSuggestion {
                description: "Sort and deduplicate symbols within import statements".to_string(),
                priority: SuggestionPriority::Low,
            });
        }

        Ok(ImportAnalysis {
            imports,
            unused_imports,
            missing_imports,
            duplicate_imports,
            organization_suggestions,
        })
    }

    /// Generate optimized import statements from analysis results.
    ///
    /// Used in the Analyze stage to prepare refactoring edits for imports.
    ///
    /// # Arguments
    ///
    /// * `analysis` - The import analysis results.
    ///
    /// # Returns
    ///
    /// A string containing optimized import statements, one per line.
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// use perl_parser::import_optimizer::ImportOptimizer;
    ///
    /// let optimizer = ImportOptimizer::new();
    /// let analysis = optimizer.analyze_content("use strict;")?;
    /// let imports = optimizer.generate_optimized_imports(&analysis);
    /// assert!(!imports.is_empty());
    /// # Ok::<(), String>(())
    /// ```
    pub fn generate_optimized_imports(&self, analysis: &ImportAnalysis) -> String {
        let mut optimized_imports = Vec::new();

        // Create a map to track which modules we want to keep and their symbols
        let mut module_symbols: BTreeMap<String, Vec<String>> = BTreeMap::new();

        // Get a list of all unused symbols per module
        let mut unused_by_module: BTreeMap<String, Vec<String>> = BTreeMap::new();
        for unused in &analysis.unused_imports {
            unused_by_module
                .entry(unused.module.clone())
                .or_default()
                .extend(unused.symbols.clone());
        }

        // Process existing imports, consolidating duplicates and removing unused symbols
        for import in &analysis.imports {
            // Keep only symbols that are not unused
            let kept_symbols: Vec<String> = import
                .symbols
                .iter()
                .filter(|sym| {
                    if let Some(unused_symbols) = unused_by_module.get(&import.module) {
                        !unused_symbols.contains(sym)
                    } else {
                        true // Keep all symbols if no unused symbols found for this module
                    }
                })
                .cloned()
                .collect();

            // Add to module_symbols map (this automatically consolidates duplicates)
            let entry = module_symbols.entry(import.module.clone()).or_default();
            entry.extend(kept_symbols);

            // Remove duplicates and sort for consistency
            entry.sort();
            entry.dedup();
        }

        // Add missing imports
        for missing in &analysis.missing_imports {
            let entry = module_symbols.entry(missing.module.clone()).or_default();
            entry.extend(missing.symbols.clone());
            entry.sort();
            entry.dedup();
        }

        // Generate import statements - only include modules that have symbols to import
        // or are bare imports (originally had empty symbols)
        for (module, symbols) in &module_symbols {
            // Check if this was originally a bare import by seeing if any original import had empty symbols
            let was_bare_import =
                analysis.imports.iter().any(|imp| imp.module == *module && imp.symbols.is_empty());

            if symbols.is_empty() && was_bare_import {
                // Bare import (like 'use strict;')
                optimized_imports.push(format!("use {};", module));
            } else if !symbols.is_empty() {
                // Import with symbols
                let symbol_list = symbols.join(" ");
                optimized_imports.push(format!("use {} qw({});", module, symbol_list));
            }
            // Skip modules with no symbols that weren't originally bare imports (all symbols were unused)
        }

        // Sort alphabetically for consistency
        optimized_imports.sort();
        optimized_imports.join("\n")
    }

    /// Generate text edits to apply optimized imports during Analyze workflows.
    ///
    /// # Arguments
    ///
    /// * `content` - Original Perl source content.
    /// * `analysis` - Import analysis results.
    ///
    /// # Returns
    ///
    /// Text edits to apply to the source document.
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// use perl_parser::import_optimizer::ImportOptimizer;
    ///
    /// let optimizer = ImportOptimizer::new();
    /// let analysis = optimizer.analyze_content("use strict;")?;
    /// let edits = optimizer.generate_edits("use strict;", &analysis);
    /// assert!(!edits.is_empty());
    /// # Ok::<(), String>(())
    /// ```
    pub fn generate_edits(&self, content: &str, analysis: &ImportAnalysis) -> Vec<TextEdit> {
        let optimized = self.generate_optimized_imports(analysis);

        if analysis.imports.is_empty() {
            if optimized.is_empty() {
                return Vec::new();
            }
            let insert_line =
                analysis.missing_imports.first().map(|m| m.suggested_location).unwrap_or(1);
            let insert_offset = self.line_offset(content, insert_line);
            return vec![TextEdit {
                range: (insert_offset, insert_offset),
                new_text: optimized + "\n",
            }];
        }

        // Defensive: use unwrap_or to handle edge cases where imports is unexpectedly empty
        // (guard at line 581 should prevent this, but defensive programming is safer)
        let first_line = analysis.imports.iter().map(|i| i.line).min().unwrap_or(1);
        let last_line = analysis.imports.iter().map(|i| i.line).max().unwrap_or(1);

        let start_offset = self.line_offset(content, first_line);
        let end_offset = self.line_offset(content, last_line + 1);

        vec![TextEdit {
            range: (start_offset, end_offset),
            new_text: if optimized.is_empty() { String::new() } else { optimized + "\n" },
        }]
    }

    fn line_offset(&self, content: &str, line: usize) -> usize {
        if line <= 1 {
            return 0;
        }
        let mut offset = 0;
        for (idx, l) in content.lines().enumerate() {
            if idx + 1 >= line {
                break;
            }
            offset += l.len() + 1; // include newline
        }
        offset
    }
}

impl Default for ImportOptimizer {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use std::path::PathBuf;
    use tempfile::TempDir;

    fn create_test_file(content: &str) -> Result<(TempDir, PathBuf), Box<dyn std::error::Error>> {
        let temp_dir = TempDir::new()?;
        let file_path = temp_dir.path().join("test.pl");
        fs::write(&file_path, content)?;
        Ok((temp_dir, file_path))
    }

    #[test]
    fn test_basic_import_analysis() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;

print Dumper(\@ARGV);
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        assert_eq!(analysis.imports.len(), 3);
        assert_eq!(analysis.imports[0].module, "strict");
        assert_eq!(analysis.imports[1].module, "warnings");
        assert_eq!(analysis.imports[2].module, "Data::Dumper");

        // Data::Dumper should not be marked as unused since Dumper is used
        assert!(analysis.unused_imports.is_empty());
        Ok(())
    }

    #[test]
    fn test_unused_import_detection() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"use strict;
use warnings;
use Data::Dumper;  # This is not used
use JSON;          # This is not used

print "Hello World\n";
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        // Bare imports without explicit symbols are assumed to have side effects,
        // so they are not reported as unused even if their exports aren't referenced.
        assert!(analysis.unused_imports.is_empty());
        Ok(())
    }

    #[test]
    fn test_missing_import_detection() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"use strict;
use warnings;

# Using JSON::encode_json without importing JSON
my $json = JSON::encode_json({key => 'value'});

# Using Data::Dumper::Dumper without importing Data::Dumper
print Data::Dumper::Dumper(\@ARGV);
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;
        assert_eq!(analysis.missing_imports.len(), 2);
        assert!(analysis.missing_imports.iter().any(|m| m.module == "JSON"));
        assert!(analysis.missing_imports.iter().any(|m| m.module == "Data::Dumper"));
        for m in &analysis.missing_imports {
            assert_eq!(m.suggested_location, 3);
        }
        Ok(())
    }

    #[test]
    fn test_duplicate_import_detection() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"use strict;
use warnings;
use Data::Dumper;
use JSON;
use Data::Dumper;  # Duplicate

print Dumper(\@ARGV);
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        assert_eq!(analysis.duplicate_imports.len(), 1);
        assert_eq!(analysis.duplicate_imports[0].module, "Data::Dumper");
        assert_eq!(analysis.duplicate_imports[0].lines.len(), 2);
        assert!(analysis.duplicate_imports[0].can_merge);
        Ok(())
    }

    #[test]
    fn test_organization_suggestions() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"use warnings;
use strict;
use List::Util qw(max max min);
use Data::Dumper;
use Data::Dumper;  # duplicate
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        assert!(
            analysis
                .organization_suggestions
                .iter()
                .any(|s| s.description.contains("Sort import statements"))
        );
        assert!(
            analysis
                .organization_suggestions
                .iter()
                .any(|s| s.description.contains("Remove duplicate imports"))
        );
        assert!(
            analysis
                .organization_suggestions
                .iter()
                .any(|s| s.description.contains("Sort and deduplicate symbols"))
        );
        Ok(())
    }

    #[test]
    fn test_qw_import_parsing() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"use List::Util qw(first max min sum);
use Scalar::Util qw(blessed reftype);

my @nums = (1, 2, 3, 4, 5);
print "Max: " . max(@nums) . "\n";
print "Sum: " . sum(@nums) . "\n";
print "First: " . first { $_ > 3 } @nums;
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        assert_eq!(analysis.imports.len(), 2);

        let list_util = analysis
            .imports
            .iter()
            .find(|i| i.module == "List::Util")
            .ok_or("List::Util import not found")?;
        assert_eq!(list_util.symbols, vec!["first", "max", "min", "sum"]);

        let scalar_util = analysis
            .imports
            .iter()
            .find(|i| i.module == "Scalar::Util")
            .ok_or("Scalar::Util import not found")?;
        assert_eq!(scalar_util.symbols, vec!["blessed", "reftype"]);

        // Should detect unused symbols in both modules
        assert_eq!(analysis.unused_imports.len(), 2);

        let list_util_unused = analysis
            .unused_imports
            .iter()
            .find(|u| u.module == "List::Util")
            .ok_or("List::Util unused imports not found")?;
        assert_eq!(list_util_unused.symbols, vec!["min"]);

        let scalar_util_unused = analysis
            .unused_imports
            .iter()
            .find(|u| u.module == "Scalar::Util")
            .ok_or("Scalar::Util unused imports not found")?;
        assert_eq!(scalar_util_unused.symbols, vec!["blessed", "reftype"]);
        Ok(())
    }

    #[test]
    fn test_generate_optimized_imports() {
        let optimizer = ImportOptimizer::new();

        let analysis = ImportAnalysis {
            imports: vec![
                ImportEntry { module: "strict".to_string(), symbols: vec![], line: 1 },
                ImportEntry { module: "warnings".to_string(), symbols: vec![], line: 2 },
                ImportEntry {
                    module: "List::Util".to_string(),
                    symbols: vec!["first".to_string(), "max".to_string(), "unused".to_string()],
                    line: 3,
                },
            ],
            unused_imports: vec![UnusedImport {
                module: "List::Util".to_string(),
                symbols: vec!["unused".to_string()],
                line: 3,
                reason: "Symbol not used".to_string(),
            }],
            missing_imports: vec![MissingImport {
                module: "Data::Dumper".to_string(),
                symbols: vec!["Dumper".to_string()],
                suggested_location: 10,
                confidence: 0.8,
            }],
            duplicate_imports: vec![],
            organization_suggestions: vec![],
        };

        let optimized = optimizer.generate_optimized_imports(&analysis);

        // Should be sorted alphabetically
        let expected_lines = [
            "use Data::Dumper qw(Dumper);",
            "use List::Util qw(first max);",
            "use strict;",
            "use warnings;",
        ];

        assert_eq!(optimized, expected_lines.join("\n"));
    }

    #[test]
    fn test_empty_file_analysis() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = "";

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        assert!(analysis.imports.is_empty());
        assert!(analysis.unused_imports.is_empty());
        assert!(analysis.missing_imports.is_empty());
        assert!(analysis.duplicate_imports.is_empty());
        Ok(())
    }

    #[test]
    fn test_complex_perl_code_analysis() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use JSON qw(encode_json decode_json);
use LWP::UserAgent;  # Unused
use File::Spec::Functions qw(catfile catdir);

# Complex code with various patterns
my $data = { key => 'value', numbers => [1, 2, 3] };
my $json_string = encode_json($data);
print "JSON: $json_string\n";

# Using File::Spec but not all imported functions
my $path = catfile('/tmp', 'test.json');
print "Path: $path\n";

# Using modules without explicit imports
my $response = HTTP::Tiny::new()->get('http://example.com');
print Dumper($response);
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        // Should detect unused imports
        assert!(analysis.unused_imports.iter().any(|u| u.module == "LWP::UserAgent"));

        // Should detect unused symbols from File::Spec::Functions
        let file_spec_unused =
            analysis.unused_imports.iter().find(|u| u.module == "File::Spec::Functions");
        if let Some(unused) = file_spec_unused {
            assert!(unused.symbols.contains(&"catdir".to_string()));
        }

        // Should detect missing import for HTTP::Tiny
        assert!(analysis.missing_imports.iter().any(|m| m.module == "HTTP::Tiny"));
        Ok(())
    }

    #[test]
    fn test_bare_import_with_exports_detection() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"use strict;
use warnings;
use Data::Dumper;  # Used
use JSON;          # Unused - has exports but none are used
use SomeUnknownModule;  # Conservative - not marked as unused

print Dumper(\@ARGV);
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        // Data::Dumper should not be unused (Dumper is used)
        assert!(!analysis.unused_imports.iter().any(|u| u.module == "Data::Dumper"));

        // JSON and SomeUnknownModule are treated as having potential side effects,
        // so neither is flagged as unused.
        assert!(analysis.unused_imports.is_empty());
        Ok(())
    }

    #[test]
    fn test_regex_edge_cases() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        let content = r#"use strict;
use warnings;

# These should not be detected as module references
my $string = "This is not JSON::encode_json in a string";
my $regex = qr/Data::Dumper/;
print "Module::Name is just text";

# This should be detected
my $result = JSON::encode_json({test => 1});
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        let analysis = optimizer.analyze_file(&file_path)?;

        // Should only detect the actual module usage, not the ones in strings/regex
        assert_eq!(analysis.missing_imports.len(), 1);
        assert_eq!(analysis.missing_imports[0].module, "JSON");
        Ok(())
    }

    #[test]
    fn test_malformed_regex_capture_safety() -> Result<(), Box<dyn std::error::Error>> {
        let optimizer = ImportOptimizer::new();
        // Content with patterns that could potentially cause regex capture issues
        let content = r#"use strict;
use warnings;

# Normal module usage
my $result = JSON::encode_json({test => 1});

# Edge case patterns that might not fully match the regex
my $incomplete = "Something::";
my $partial = "::Function";
"#;

        let (_temp_dir, file_path) = create_test_file(content)?;
        // Should not panic even with edge case patterns
        let analysis = optimizer.analyze_file(&file_path)?;

        // Should detect JSON usage
        assert_eq!(analysis.missing_imports.len(), 1);
        assert_eq!(analysis.missing_imports[0].module, "JSON");
        Ok(())
    }
}