dumpling 0.1.0

A fast JavaScript runtime and bundler in Rust
Documentation
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;

use crate::error::Result;

#[derive(Debug, Serialize, Deserialize)]
pub struct Chunk {
    pub id: String,
    pub modules: Vec<String>,
    pub file_name: String,
    pub size: usize,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct SplittingConfig {
    pub automatic: bool,
    pub vendor_chunk: bool,
    pub chunk_size_limit: usize,
    pub max_initial_requests: u32,
    pub max_async_requests: u32,
    pub min_size: usize,
    pub cache_groups: HashMap<String, CacheGroup>,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct CacheGroup {
    pub test: String, // Regex pattern to match modules
    pub name: String, // Name of the chunk
    priority: i32,
}

impl Default for SplittingConfig {
    fn default() -> Self {
        let mut cache_groups = HashMap::new();

        // Default vendor cache group
        cache_groups.insert(
            "vendor".to_string(),
            CacheGroup {
                test: "[\\\\/]node_modules[\\\\/]".to_string(),
                name: "vendor".to_string(),
                priority: -10,
            },
        );

        // Default common cache group
        cache_groups.insert(
            "common".to_string(),
            CacheGroup {
                test: "".to_string(), // Matches all modules
                name: "common".to_string(),
                priority: -20,
            },
        );

        Self {
            automatic: true,
            vendor_chunk: true,
            chunk_size_limit: 30000, // 30KB
            max_initial_requests: 3,
            max_async_requests: 5,
            min_size: 20000, // 20KB
            cache_groups,
        }
    }
}

pub struct CodeSplitter {
    config: SplittingConfig,
    import_regex: Regex,
    dynamic_import_regex: Regex,
}

impl CodeSplitter {
    pub fn new(config: SplittingConfig) -> Self {
        Self {
            config,
            import_regex: Regex::new(r#"import\s+.*?from\s+['"]([^'"]+)['"]"#).unwrap(),
            dynamic_import_regex: Regex::new(r#"import\s*\(\s*['"]([^'"]+)['"]\s*\)"#).unwrap(),
        }
    }

    /// Analyze dependencies and create chunks
    pub fn split(
        &self,
        graph: &crate::bundler::DependencyGraph,
        output_dir: &PathBuf,
    ) -> Result<Vec<Chunk>> {
        let mut chunks = Vec::new();
        let mut processed_modules: HashSet<String> = HashSet::new();
        let topological_order = graph.topological_sort()?;

        // Create entry point chunk
        if let Some(entry_id) = &graph.entry_id {
            let entry_chunk =
                self.create_entry_chunk(graph, entry_id, &topological_order, output_dir)?;
            processed_modules.insert(entry_id.clone());
            chunks.push(entry_chunk);
        }

        // Create vendor chunk if enabled
        if self.config.vendor_chunk {
            let vendor_chunk = self.create_vendor_chunk(
                graph,
                &topological_order,
                &processed_modules,
                output_dir,
            )?;
            if !vendor_chunk.modules.is_empty() {
                for module_id in &vendor_chunk.modules {
                    processed_modules.insert(module_id.clone());
                }
                chunks.push(vendor_chunk);
            }
        }

        // Split remaining modules into chunks based on size and cache groups
        let remaining_chunks = self.create_remaining_chunks(
            graph,
            &topological_order,
            &processed_modules,
            output_dir,
        )?;
        chunks.extend(remaining_chunks);

        Ok(chunks)
    }

    /// Create entry point chunk
    fn create_entry_chunk(
        &self,
        graph: &crate::bundler::DependencyGraph,
        entry_id: &str,
        topological_order: &[String],
        output_dir: &PathBuf,
    ) -> Result<Chunk> {
        let mut modules = Vec::new();
        let mut size = 0;

        // Entry point includes itself and its direct dependencies
        if let Some(entry_module) = graph.get_module(entry_id) {
            size += entry_module.source.len();

            // Add dependencies that are not vendor modules
            for module_id in topological_order {
                if module_id != entry_id && !self.is_vendor_module(module_id) {
                    if let Some(module) = graph.get_module(module_id) {
                        // Check if this module is directly imported by the entry
                        if self.is_directly_imported_by(graph, module_id, entry_id) {
                            modules.push(module_id.clone());
                            size += module.source.len();
                        }
                    }
                }
            }
        }

        Ok(Chunk {
            id: "main".to_string(),
            modules,
            file_name: "main.js".to_string(),
            size,
        })
    }

    /// Create vendor chunk containing third-party dependencies
    fn create_vendor_chunk(
        &self,
        graph: &crate::bundler::DependencyGraph,
        topological_order: &[String],
        processed_modules: &HashSet<String>,
        output_dir: &PathBuf,
    ) -> Result<Chunk> {
        let mut modules = Vec::new();
        let mut size = 0;

        for module_id in topological_order {
            if !processed_modules.contains(module_id) && self.is_vendor_module(module_id) {
                if let Some(module) = graph.get_module(module_id) {
                    modules.push(module_id.clone());
                    size += module.source.len();
                }
            }
        }

        Ok(Chunk {
            id: "vendor".to_string(),
            modules,
            file_name: "vendor.js".to_string(),
            size,
        })
    }

    /// Split remaining modules into chunks
    fn create_remaining_chunks(
        &self,
        graph: &crate::bundler::DependencyGraph,
        topological_order: &[String],
        processed_modules: &HashSet<String>,
        output_dir: &PathBuf,
    ) -> Result<Vec<Chunk>> {
        let mut chunks = Vec::new();
        let mut current_chunk_modules = Vec::new();
        let mut current_size = 0;
        let mut chunk_counter = 1;

        for module_id in topological_order {
            if processed_modules.contains(module_id) {
                continue;
            }

            if let Some(module) = graph.get_module(module_id) {
                let module_size = module.source.len();

                // Check if we need to start a new chunk
                if current_size > 0
                    && (current_size + module_size > self.config.chunk_size_limit
                        || current_size > self.config.min_size
                            && self.is_good_split_point(module_id))
                {
                    // Create current chunk
                    chunks.push(Chunk {
                        id: format!("chunk-{}", chunk_counter),
                        modules: current_chunk_modules.clone(),
                        file_name: format!("chunk-{}.js", chunk_counter),
                        size: current_size,
                    });

                    current_chunk_modules.clear();
                    current_size = 0;
                    chunk_counter += 1;
                }

                // Add module to current chunk
                current_chunk_modules.push(module_id.clone());
                current_size += module_size;
            }
        }

        // Create final chunk if it has modules
        if !current_chunk_modules.is_empty() {
            chunks.push(Chunk {
                id: format!("chunk-{}", chunk_counter),
                modules: current_chunk_modules,
                file_name: format!("chunk-{}.js", chunk_counter),
                size: current_size,
            });
        }

        Ok(chunks)
    }

    /// Check if a module is from node_modules (vendor)
    fn is_vendor_module(&self, module_id: &str) -> bool {
        module_id.contains("node_modules")
    }

    /// Check if a module is directly imported by another module
    fn is_directly_imported_by(
        &self,
        graph: &crate::bundler::DependencyGraph,
        module_id: &str,
        by_id: &str,
    ) -> bool {
        let deps = graph.get_resolved_dependencies(by_id);
        for (_, dep_id) in deps {
            if dep_id == module_id {
                return true;
            }
        }
        false
    }

    /// Check if a module is a good split point
    fn is_good_split_point(&self, module_id: &str) -> bool {
        // Split at route boundaries or other natural boundaries
        module_id.contains("/pages/")
            || module_id.contains("/routes/")
            || module_id.contains("/components/")
    }

    /// Generate code to load chunks on demand
    pub fn generate_chunk_loader(&self, chunks: &[Chunk]) -> String {
        let mut loader = String::new();

        loader.push_str("(function() {\n");
        loader.push_str("  var chunks = {};\n");

        // Create chunk loading functions
        for chunk in chunks {
            if chunk.id != "main" {
                // Don't create loader for main chunk
                loader.push_str(&format!("  chunks['{}'] = function() {{\n", chunk.id));
                loader.push_str(&format!("    return import('./{}');\n", chunk.file_name));
                loader.push_str("  };\n");
            }
        }

        loader.push_str("  window.__dumpling_chunks__ = chunks;\n");
        loader.push_str("})();\n");

        loader
    }

    /// Update imports to use chunk loader
    pub fn update_dynamic_imports(&self, module_source: &str, target_chunks: &[String]) -> String {
        let updated_source =
            self.dynamic_import_regex
                .replace_all(module_source, |caps: &regex::Captures| {
                    let import_path = &caps[1];

                    // Check if this import should be chunked
                    if self.should_chunk_import(import_path, target_chunks) {
                        format!("__dumpling_chunks__['chunk-{}']()", import_path)
                    } else {
                        caps.get(0).unwrap().as_str().to_string()
                    }
                });

        updated_source.to_string()
    }

    /// Determine if an import should be split into a separate chunk
    fn should_chunk_import(&self, import_path: &str, target_chunks: &[String]) -> bool {
        // Simple heuristic - in a real implementation, this would be more sophisticated
        import_path.contains("./pages/")
            || import_path.contains("./routes/")
            || target_chunks
                .iter()
                .any(|chunk| import_path.contains(chunk))
    }
}