rs_web/
build.rs

1//! Build orchestrator for static site generation
2
3use anyhow::{Context, Result};
4use log::{debug, info, trace};
5use rayon::prelude::*;
6use std::fs;
7use std::path::{Path, PathBuf};
8use std::sync::Arc;
9
10use crate::config::{Config, PageDef};
11use crate::templates::Templates;
12use crate::tracker::{
13    AssetRef, BuildTracker, CachedDeps, SharedTracker, extract_html_asset_refs,
14    extract_markdown_asset_refs, resolve_url_to_source,
15};
16
17/// Cache file name
18const CACHE_FILE: &str = ".rs-web-cache/deps.bin";
19
20/// Main build orchestrator
21pub struct Builder {
22    config: Config,
23    output_dir: PathBuf,
24    project_dir: PathBuf,
25    /// Build dependency tracker
26    tracker: SharedTracker,
27    /// Cached dependency info from previous build
28    cached_deps: Option<CachedDeps>,
29    /// Cached global data from last build
30    cached_global_data: Option<serde_json::Value>,
31    /// Cached page definitions from last build
32    cached_pages: Option<Vec<PageDef>>,
33}
34
35impl Builder {
36    pub fn new(config: Config, output_dir: PathBuf, project_dir: PathBuf) -> Self {
37        // Load cached deps from previous build
38        let cache_path = project_dir.join(CACHE_FILE);
39        let cached_deps = CachedDeps::load(&cache_path);
40        if cached_deps.is_some() {
41            debug!("Loaded cached dependency info from {:?}", cache_path);
42        }
43
44        // Get the tracker from config (it was created during config loading)
45        let tracker = config.tracker().clone();
46
47        Self {
48            config,
49            output_dir,
50            project_dir,
51            tracker,
52            cached_deps,
53            cached_global_data: None,
54            cached_pages: None,
55        }
56    }
57
58    /// Create a new builder with a fresh tracker (for full rebuilds)
59    pub fn new_with_tracker(project_dir: PathBuf, output_dir: PathBuf) -> Result<Self> {
60        let tracker = Arc::new(BuildTracker::new());
61        let config = Config::load_with_tracker(&project_dir, tracker.clone())?;
62
63        // Load cached deps from previous build
64        let cache_path = project_dir.join(CACHE_FILE);
65        let cached_deps = CachedDeps::load(&cache_path);
66        if cached_deps.is_some() {
67            debug!("Loaded cached dependency info from {:?}", cache_path);
68        }
69
70        Ok(Self {
71            config,
72            output_dir,
73            project_dir,
74            tracker,
75            cached_deps,
76            cached_global_data: None,
77            cached_pages: None,
78        })
79    }
80
81    /// Resolve a path relative to the project directory
82    fn resolve_path(&self, path: &str) -> PathBuf {
83        let p = Path::new(path);
84        if p.is_absolute() {
85            p.to_path_buf()
86        } else {
87            self.project_dir.join(path)
88        }
89    }
90
91    pub fn build(&mut self) -> Result<()> {
92        info!("Starting build");
93        debug!("Output directory: {:?}", self.output_dir);
94        debug!("Project directory: {:?}", self.project_dir);
95
96        // Stage 1: Clean output directory
97        trace!("Stage 1: Cleaning output directory");
98        self.clean()?;
99
100        // Run before_build hook (after clean, so it can write to output_dir)
101        trace!("Running before_build hook");
102        self.config.call_before_build()?;
103
104        // Stage 2: Call data() to get global data
105        trace!("Stage 2: Calling data() function");
106        let global_data = self.config.call_data()?;
107        debug!("Global data loaded");
108
109        // Stage 3: Call pages(global) to get page definitions
110        trace!("Stage 3: Calling pages() function");
111        let pages = self.config.call_pages(&global_data)?;
112        debug!("Found {} pages to generate", pages.len());
113
114        // Cache for incremental builds
115        self.cached_global_data = Some(global_data.clone());
116        self.cached_pages = Some(pages.clone());
117
118        // Stage 4: Load templates
119        trace!("Stage 5: Loading templates");
120        let templates = Templates::new(
121            &self.resolve_path(&self.config.paths.templates),
122            Some(self.tracker.clone()),
123            Some(self.config.asset_manifest.clone()),
124        )?;
125
126        // Stage 6: Render all pages in parallel
127        trace!("Stage 6: Rendering {} pages", pages.len());
128        self.render_pages(&pages, &global_data, &templates)?;
129
130        info!("Build complete: {} pages generated", pages.len());
131        rs_print!("Generated {} pages", pages.len());
132
133        trace!("Running after_build hook");
134        self.config.call_after_build()?;
135
136        // Merge all thread-local tracking data and save
137        self.tracker.merge_all_threads();
138        self.save_cached_deps()?;
139
140        Ok(())
141    }
142
143    /// Save tracked dependencies to cache file
144    fn save_cached_deps(&self) -> Result<()> {
145        // Clean up stale files before saving
146        self.cleanup_stale_writes();
147
148        let cache_path = self.project_dir.join(CACHE_FILE);
149        let deps = CachedDeps::from_tracker(&self.tracker);
150        deps.save(&cache_path)
151            .with_context(|| format!("Failed to save dependency cache to {:?}", cache_path))?;
152        debug!(
153            "Saved dependency cache: {} reads, {} writes",
154            deps.reads.len(),
155            deps.writes.len()
156        );
157        Ok(())
158    }
159
160    /// Remove files that were written in previous build but not in current build
161    fn cleanup_stale_writes(&self) {
162        let Some(old_deps) = &self.cached_deps else {
163            return;
164        };
165
166        let new_writes = self.tracker.get_writes();
167        let mut removed = 0;
168
169        for old_path in old_deps.writes.keys() {
170            // Only clean up files in output directory
171            if !old_path.starts_with(&self.output_dir) {
172                continue;
173            }
174
175            // If file was written before but not now, it's stale
176            if !new_writes.contains_key(old_path) && old_path.exists() {
177                if let Err(e) = std::fs::remove_file(old_path) {
178                    debug!("Failed to remove stale file {:?}: {}", old_path, e);
179                } else {
180                    debug!("Removed stale file: {:?}", old_path);
181                    removed += 1;
182                }
183            }
184        }
185
186        if removed > 0 {
187            debug!("Cleaned up {} stale files", removed);
188        }
189    }
190
191    /// Remove files that exist in old_writes but not in new_writes
192    fn cleanup_stale_files(
193        &self,
194        old_writes: &std::collections::HashMap<PathBuf, crate::tracker::FileState>,
195        new_writes: &std::collections::HashMap<PathBuf, crate::tracker::FileState>,
196    ) {
197        let mut removed = 0;
198
199        for old_path in old_writes.keys() {
200            // Only clean up files in output directory
201            if !old_path.starts_with(&self.output_dir) {
202                continue;
203            }
204
205            // If file was written before but not now, it's stale
206            if !new_writes.contains_key(old_path) && old_path.exists() {
207                if let Err(e) = std::fs::remove_file(old_path) {
208                    debug!("Failed to remove stale file {:?}: {}", old_path, e);
209                } else {
210                    debug!("Removed stale file: {:?}", old_path);
211                    removed += 1;
212                }
213            }
214        }
215
216        if removed > 0 {
217            rs_print!("Cleaned up {} stale files", removed);
218        }
219    }
220
221    /// Get files that have changed since last build
222    pub fn get_changed_files(&self) -> Vec<PathBuf> {
223        match &self.cached_deps {
224            Some(cached) => self.tracker.get_changed_files(cached),
225            None => Vec::new(), // No cache means full rebuild needed
226        }
227    }
228
229    /// Check if a full rebuild is needed (no cache or config changed)
230    pub fn needs_full_rebuild(&self) -> bool {
231        self.cached_deps.is_none()
232    }
233
234    /// Check if a file was tracked as a dependency in the last build
235    /// This includes both explicit reads (via Lua API) and implicit refs (via HTML/markdown)
236    pub fn is_tracked_file(&self, path: &Path) -> bool {
237        if let Some(ref cached) = self.cached_deps {
238            // Canonicalize path for comparison
239            let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
240            // Check if it was read via Lua API (copy_file, read_file, etc.)
241            if cached.reads.contains_key(&path) {
242                return true;
243            }
244            // Check if it's referenced in any page's HTML/markdown
245            if cached.asset_to_pages.contains_key(&path) {
246                return true;
247            }
248            false
249        } else {
250            // No cache, assume all files are relevant
251            true
252        }
253    }
254
255    /// Check if any tracked files have changed since last build
256    pub fn has_tracked_changes(&self) -> bool {
257        if let Some(ref cached) = self.cached_deps {
258            !self.tracker.get_changed_files(cached).is_empty()
259        } else {
260            true // No cache means we need to build
261        }
262    }
263
264    fn clean(&self) -> Result<()> {
265        if self.output_dir.exists() {
266            debug!("Removing existing output directory: {:?}", self.output_dir);
267            fs::remove_dir_all(&self.output_dir).with_context(|| {
268                format!("Failed to clean output directory: {:?}", self.output_dir)
269            })?;
270        }
271        trace!("Creating output directories");
272        fs::create_dir_all(&self.output_dir)?;
273        fs::create_dir_all(self.output_dir.join("static"))?;
274        Ok(())
275    }
276
277    /// Remove pages that existed in the old build but not in the new one
278    fn remove_stale_pages(&self, old_pages: &[PageDef], new_pages: &[PageDef]) -> Result<()> {
279        use std::collections::HashSet;
280
281        // Collect new page paths
282        let new_paths: HashSet<&str> = new_pages.iter().map(|p| p.path.as_str()).collect();
283
284        // Find and remove stale pages
285        for old_page in old_pages {
286            if !new_paths.contains(old_page.path.as_str()) {
287                let relative_path = old_page.path.trim_matches('/');
288
289                // Check if path has a file extension
290                let has_extension = relative_path.contains('.')
291                    && !relative_path.ends_with('/')
292                    && relative_path
293                        .rsplit('/')
294                        .next()
295                        .map(|s| s.contains('.'))
296                        .unwrap_or(false);
297
298                let file_path = if has_extension {
299                    self.output_dir.join(relative_path)
300                } else if relative_path.is_empty() {
301                    self.output_dir.join("index.html")
302                } else {
303                    self.output_dir.join(relative_path).join("index.html")
304                };
305
306                if file_path.exists() {
307                    rs_print!("  Removed: {}", old_page.path);
308                    fs::remove_file(&file_path)?;
309
310                    // Try to remove empty parent directory
311                    if let Some(parent) = file_path.parent()
312                        && parent != self.output_dir
313                        && parent.read_dir()?.next().is_none()
314                    {
315                        let _ = fs::remove_dir(parent);
316                    }
317                }
318            }
319        }
320
321        Ok(())
322    }
323
324    fn render_pages(
325        &self,
326        pages: &[PageDef],
327        global_data: &serde_json::Value,
328        templates: &Templates,
329    ) -> Result<()> {
330        // Render all pages in parallel
331        pages
332            .par_iter()
333            .try_for_each(|page| self.render_single_page(page, global_data, templates))?;
334
335        Ok(())
336    }
337
338    fn render_single_page(
339        &self,
340        page: &PageDef,
341        global_data: &serde_json::Value,
342        templates: &Templates,
343    ) -> Result<()> {
344        trace!("Rendering page: {}", page.path);
345
346        // Process content through markdown using Lua rs.markdown.render if provided
347        let html_content = if let Some(ref markdown) = page.content {
348            Some(self.config.render_markdown(markdown)?)
349        } else {
350            page.html.clone()
351        };
352
353        // If no template, output html directly (for raw text/xml files)
354        let html = if page.template.is_none() {
355            html_content.unwrap_or_default()
356        } else {
357            templates.render_page(&self.config, page, global_data, html_content.as_deref())?
358        };
359
360        // Extract asset references from the generated HTML and markdown content
361        self.extract_and_record_asset_refs(page, &html);
362
363        // Minify HTML if enabled (default: true)
364        let html = if page.minify {
365            minify_html(&html)
366        } else {
367            html
368        };
369
370        // Write output file
371        let relative_path = page.path.trim_matches('/');
372
373        // Check if path has a file extension (e.g., feed.xml, sitemap.json)
374        let has_extension = relative_path.contains('.')
375            && !relative_path.ends_with('/')
376            && relative_path
377                .rsplit('/')
378                .next()
379                .map(|s| s.contains('.'))
380                .unwrap_or(false);
381
382        if has_extension {
383            // Write directly to file path (e.g., /feed.xml -> dist/feed.xml)
384            let file_path = self.output_dir.join(relative_path);
385            if let Some(parent) = file_path.parent() {
386                fs::create_dir_all(parent)?;
387            }
388            fs::write(&file_path, &html)?;
389            // Track the write for CSS purging
390            self.tracker.record_write(file_path, html.as_bytes());
391        } else {
392            // Write to directory with index.html (e.g., /about/ -> dist/about/index.html)
393            let page_dir = if relative_path.is_empty() {
394                self.output_dir.clone()
395            } else {
396                self.output_dir.join(relative_path)
397            };
398            fs::create_dir_all(&page_dir)?;
399            let file_path = page_dir.join("index.html");
400            fs::write(&file_path, &html)?;
401            // Track the write for CSS purging
402            self.tracker.record_write(file_path, html.as_bytes());
403        }
404
405        Ok(())
406    }
407
408    /// Extract asset references from HTML and markdown, record them in tracker
409    fn extract_and_record_asset_refs(&self, page: &PageDef, html: &str) {
410        // Extract from HTML
411        let mut url_paths: Vec<String> = extract_html_asset_refs(html);
412
413        // Also extract from markdown content if present
414        if let Some(ref markdown) = page.content {
415            let md_refs = extract_markdown_asset_refs(markdown);
416            url_paths.extend(md_refs);
417        }
418
419        if url_paths.is_empty() {
420            return;
421        }
422
423        // Get writes to resolve URL paths to source files
424        let writes = self.tracker.get_writes();
425
426        // Convert URL paths to AssetRefs with source resolution
427        let asset_refs: Vec<AssetRef> = url_paths
428            .into_iter()
429            .map(|url_path| {
430                let source_path =
431                    resolve_url_to_source(&url_path, &self.output_dir, &writes, &self.project_dir);
432                AssetRef {
433                    url_path,
434                    source_path,
435                }
436            })
437            .collect();
438
439        // Record in tracker
440        let page_path = PathBuf::from(&page.path);
441        self.tracker.record_html_refs(page_path, asset_refs);
442    }
443
444    /// Perform an incremental build based on what changed
445    /// Uses tracker data to filter changes to only files that were actually used
446    pub fn incremental_build(&mut self, changes: &crate::watch::ChangeSet) -> Result<()> {
447        debug!("Starting incremental build");
448        trace!("Change set: {:?}", changes);
449
450        // Config changed - full rebuild needed (Lua functions may have changed)
451        if changes.full_rebuild {
452            return self.build();
453        }
454
455        // Filter content changes to only files that were tracked as dependencies
456        let relevant_content: Vec<PathBuf> = changes
457            .content_files
458            .iter()
459            .filter(|p| {
460                let full_path = self.project_dir.join(p);
461                let is_tracked = self.is_tracked_file(&full_path);
462                if !is_tracked {
463                    trace!("Skipping untracked content file: {:?}", p);
464                }
465                is_tracked
466            })
467            .map(|p| self.project_dir.join(p))
468            .collect();
469
470        // Filter asset changes to only files that were tracked as dependencies
471        let relevant_assets: Vec<PathBuf> = changes
472            .asset_files
473            .iter()
474            .filter(|p| {
475                let full_path = self.project_dir.join(p);
476                let is_tracked = self.is_tracked_file(&full_path);
477                if !is_tracked {
478                    trace!("Skipping untracked asset file: {:?}", p);
479                }
480                is_tracked
481            })
482            .map(|p| self.project_dir.join(p))
483            .collect();
484
485        // Log skipped files
486        if !changes.content_files.is_empty() && relevant_content.is_empty() {
487            debug!(
488                "All {} content files were untracked, skipping",
489                changes.content_files.len()
490            );
491        }
492        if !changes.asset_files.is_empty() && relevant_assets.is_empty() {
493            debug!(
494                "All {} asset files were untracked, skipping",
495                changes.asset_files.len()
496            );
497        }
498
499        // Handle asset changes first (before_build runs copy_file, etc.)
500        if !relevant_assets.is_empty() {
501            debug!(
502                "{} tracked assets changed (out of {} total)",
503                relevant_assets.len(),
504                changes.asset_files.len()
505            );
506            self.rebuild_assets_only(&relevant_assets)?;
507        }
508
509        // Handle CSS changes
510        if changes.rebuild_css {
511            self.rebuild_css_only()?;
512        }
513
514        // Content files changed - try incremental update
515        if !relevant_content.is_empty() {
516            debug!(
517                "{} tracked content files changed (out of {} total)",
518                relevant_content.len(),
519                changes.content_files.len()
520            );
521            return self.rebuild_content_only(&relevant_content);
522        }
523
524        // Template changes - re-render affected pages with cached data (skip Lua calls)
525        if changes.has_template_changes() {
526            for path in &changes.template_files {
527                rs_print!("  Changed: {}", path.display());
528            }
529            return self.rebuild_templates_only(&changes.template_files);
530        }
531
532        Ok(())
533    }
534
535    /// Rebuild content - use incremental update if available, otherwise full data reload
536    fn rebuild_content_only(&mut self, changed_paths: &[PathBuf]) -> Result<()> {
537        debug!(
538            "Content-only rebuild for {} changed files",
539            changed_paths.len()
540        );
541
542        // Print changed files
543        for path in changed_paths {
544            if let Ok(rel) = path.strip_prefix(&self.project_dir) {
545                rs_print!("  Changed: {}", rel.display());
546            } else {
547                rs_print!("  Changed: {}", path.display());
548            }
549        }
550
551        // Try incremental update if update_data function exists and we have cached data
552        let global_data = if self.config.has_update_data() && self.cached_global_data.is_some() {
553            debug!("Using incremental update_data()");
554            let cached = self.cached_global_data.as_ref().unwrap();
555            // Convert absolute paths to relative paths for Lua
556            let relative_paths: Vec<PathBuf> = changed_paths
557                .iter()
558                .filter_map(|p| {
559                    p.strip_prefix(&self.project_dir)
560                        .ok()
561                        .map(|r| r.to_path_buf())
562                })
563                .collect();
564            self.config.call_update_data(cached, &relative_paths)?
565        } else {
566            debug!("Using full data() reload");
567            self.config.call_data()?
568        };
569
570        let pages = self.config.call_pages(&global_data)?;
571
572        // Remove stale pages that no longer exist in the new page list
573        if let Some(ref old_pages) = self.cached_pages {
574            self.remove_stale_pages(old_pages, &pages)?;
575        }
576
577        // Update cache
578        self.cached_global_data = Some(global_data.clone());
579        self.cached_pages = Some(pages.clone());
580
581        // Reload templates and re-render
582        let templates = Templates::new(
583            &self.resolve_path(&self.config.paths.templates),
584            Some(self.tracker.clone()),
585            Some(self.config.asset_manifest.clone()),
586        )?;
587        self.render_pages(&pages, &global_data, &templates)?;
588
589        // Merge thread-local tracking data and save
590        self.tracker.merge_all_threads();
591        self.save_cached_deps()?;
592
593        self.config.call_after_build()?;
594
595        rs_print!("Re-rendered {} pages (content changed)", pages.len());
596        Ok(())
597    }
598
599    /// Rebuild only by re-rendering templates with cached data
600    fn rebuild_templates_only(
601        &mut self,
602        changed_template_files: &std::collections::HashSet<PathBuf>,
603    ) -> Result<()> {
604        let (global_data, all_pages) = match (&self.cached_global_data, &self.cached_pages) {
605            (Some(data), Some(pages)) => (data.clone(), pages.clone()),
606            _ => {
607                // No cache available, do a full build to populate it
608                log::info!("No cached data available, performing full build");
609                return self.build();
610            }
611        };
612
613        // Reload templates and get dependency graph
614        let template_dir = self.resolve_path(&self.config.paths.templates);
615        let templates = Templates::new(
616            &template_dir,
617            Some(self.tracker.clone()),
618            Some(self.config.asset_manifest.clone()),
619        )?;
620        let deps = templates.deps();
621
622        // Find all affected templates (transitively)
623        let mut affected_templates = std::collections::HashSet::new();
624        for changed_path in changed_template_files {
625            // Find template name from path
626            if let Some(template_name) = deps.find_template_by_path(changed_path) {
627                let transitive = deps.get_affected_templates(template_name);
628                affected_templates.extend(transitive);
629            } else if let Ok(rel_path) = changed_path.strip_prefix(&template_dir) {
630                // Try relative path as template name
631                let template_name = rel_path.to_string_lossy().to_string();
632                let transitive = deps.get_affected_templates(&template_name);
633                affected_templates.extend(transitive);
634            }
635        }
636
637        debug!("Affected templates: {:?}", affected_templates);
638
639        // Filter pages to only those using affected templates
640        let pages_to_rebuild: Vec<_> = all_pages
641            .iter()
642            .filter(|page| {
643                if let Some(ref template) = page.template {
644                    affected_templates.contains(template)
645                } else {
646                    false
647                }
648            })
649            .cloned()
650            .collect();
651
652        if pages_to_rebuild.is_empty() {
653            rs_print!("No pages affected by template changes");
654            return Ok(());
655        }
656
657        debug!(
658            "Template rebuild: {} of {} pages affected",
659            pages_to_rebuild.len(),
660            all_pages.len()
661        );
662
663        // Re-render only affected pages with cached data
664        self.render_pages(&pages_to_rebuild, &global_data, &templates)?;
665
666        // Merge thread-local tracking data and save (cleanup stale files)
667        self.tracker.merge_all_threads();
668        self.save_cached_deps()?;
669
670        self.config.call_after_build()?;
671
672        rs_print!(
673            "Re-rendered {} of {} pages (templates changed)",
674            pages_to_rebuild.len(),
675            all_pages.len()
676        );
677        Ok(())
678    }
679
680    /// Rebuild CSS by calling before_build hook (CSS is now handled via Lua)
681    fn rebuild_css_only(&self) -> Result<()> {
682        rs_print!("  Changed: styles");
683        self.config.call_before_build()?;
684        rs_print!("Rebuilt CSS");
685
686        // Merge thread-local tracking data and save
687        self.tracker.merge_all_threads();
688        self.save_cached_deps()?;
689
690        self.config.call_after_build()?;
691
692        Ok(())
693    }
694
695    /// Rebuild assets by calling before_build hook and re-rendering pages
696    /// Pages need to be re-rendered because asset hashes may have changed
697    fn rebuild_assets_only(&mut self, changed_paths: &[PathBuf]) -> Result<()> {
698        // Save old writes before clearing to find stale files later
699        let old_writes = self.tracker.get_writes();
700        self.tracker.clear_writes();
701
702        for path in changed_paths {
703            if let Ok(rel) = path.strip_prefix(&self.project_dir) {
704                rs_print!("  Changed: {}", rel.display());
705            } else {
706                rs_print!("  Changed: {}", path.display());
707            }
708        }
709        // Run before_build which rebuilds assets and updates the manifest
710        self.config.call_before_build()?;
711
712        // Re-render all pages since asset paths may have changed
713        // (the asset filter uses the manifest which was just updated)
714        if let (Some(global_data), Some(pages)) = (&self.cached_global_data, &self.cached_pages) {
715            let templates = Templates::new(
716                &self.resolve_path(&self.config.paths.templates),
717                Some(self.tracker.clone()),
718                Some(self.config.asset_manifest.clone()),
719            )?;
720            self.render_pages(pages, global_data, &templates)?;
721            rs_print!(
722                "Rebuilt {} assets, re-rendered {} pages",
723                changed_paths.len(),
724                pages.len()
725            );
726        } else {
727            rs_print!("Rebuilt {} assets", changed_paths.len());
728        }
729
730        // Merge thread-local tracking data
731        self.tracker.merge_all_threads();
732
733        // Find and delete stale files (in old_writes but not in new_writes)
734        let new_writes = self.tracker.get_writes();
735        self.cleanup_stale_files(&old_writes, &new_writes);
736
737        self.save_cached_deps()?;
738
739        self.config.call_after_build()?;
740
741        Ok(())
742    }
743
744    /// Reload config from disk
745    pub fn reload_config(&mut self) -> Result<()> {
746        debug!("Reloading config from {:?}", self.project_dir);
747        self.config = crate::config::Config::load(&self.project_dir)?;
748        // Clear cache since Lua functions might produce different output
749        self.cached_global_data = None;
750        self.cached_pages = None;
751        info!("Config reloaded successfully");
752        Ok(())
753    }
754
755    /// Get a reference to the current config
756    pub fn config(&self) -> &Config {
757        &self.config
758    }
759}
760
761/// Minify HTML content with OXC-based inline JS minification
762fn minify_html(html: &str) -> String {
763    // First, minify inline JS with OXC (minify-js has bugs)
764    let html = minify_inline_js(html);
765
766    let cfg = minify_html::Cfg {
767        minify_js: false,
768        minify_css: true,
769        ..Default::default()
770    };
771    let minified = minify_html::minify(html.as_bytes(), &cfg);
772    String::from_utf8(minified).unwrap_or_else(|_| html.to_string())
773}
774
775/// Minify inline <script> tags using OXC
776fn minify_inline_js(html: &str) -> String {
777    use oxc_allocator::Allocator;
778    use oxc_codegen::{Codegen, CodegenOptions};
779    use oxc_minifier::{CompressOptions, MangleOptions, Minifier, MinifierOptions};
780    use oxc_parser::Parser;
781    use oxc_span::SourceType;
782    use regex::Regex;
783
784    let re = Regex::new(r"(?s)(<script(?:\s[^>]*)?>)(.*?)(</script>)").unwrap();
785
786    re.replace_all(html, |caps: &regex::Captures| {
787        let open_tag = &caps[1];
788        let content = &caps[2];
789        let close_tag = &caps[3];
790
791        // Skip external scripts (src=) or empty scripts
792        if open_tag.contains("src=") || content.trim().is_empty() {
793            return format!("{}{}{}", open_tag, content, close_tag);
794        }
795
796        // Try to minify with OXC
797        let allocator = Allocator::default();
798        let source_type = SourceType::mjs();
799        let ret = Parser::new(&allocator, content, source_type).parse();
800
801        if !ret.errors.is_empty() {
802            // Parse error - return original
803            return format!("{}{}{}", open_tag, content, close_tag);
804        }
805
806        let mut program = ret.program;
807        let options = MinifierOptions {
808            mangle: Some(MangleOptions::default()),
809            compress: Some(CompressOptions::default()),
810        };
811
812        Minifier::new(options).minify(&allocator, &mut program);
813        let minified = Codegen::new()
814            .with_options(CodegenOptions::minify())
815            .build(&program)
816            .code;
817
818        format!("{}{}{}", open_tag, minified, close_tag)
819    })
820    .to_string()
821}
822
823#[cfg(test)]
824mod tests {
825    use super::*;
826
827    #[test]
828    fn test_minify_html_basic() {
829        let input = "<html>  <body>   <p>Hello</p>  </body>  </html>";
830        let result = minify_html(input);
831        assert!(result.len() <= input.len());
832        assert!(result.contains("Hello"));
833    }
834
835    #[test]
836    fn test_minify_html_preserves_pre() {
837        let input = "<pre>  code  with  spaces  </pre>";
838        let result = minify_html(input);
839        // Pre tags should preserve whitespace
840        assert!(result.contains("code  with  spaces"));
841    }
842
843    #[test]
844    fn test_minify_inline_js_basic() {
845        let input = r#"<script>
846            function hello() {
847                console.log("hi");
848            }
849        </script>"#;
850        let result = minify_inline_js(input);
851        assert!(
852            !result.contains('\n') || result.matches('\n').count() < input.matches('\n').count()
853        );
854        assert!(result.contains("<script>"));
855        assert!(result.contains("</script>"));
856    }
857
858    #[test]
859    fn test_minify_inline_js_skips_external() {
860        let input = r#"<script src="/js/app.js"></script>"#;
861        let result = minify_inline_js(input);
862        assert_eq!(result, input);
863    }
864
865    #[test]
866    fn test_minify_inline_js_skips_empty() {
867        let input = "<script></script>";
868        let result = minify_inline_js(input);
869        assert_eq!(result, input);
870    }
871
872    #[test]
873    fn test_minify_inline_js_multiple_scripts() {
874        // Use console.log to prevent DCE
875        let input = r#"<script>console.log(1);</script><script>console.log(2);</script>"#;
876        let result = minify_inline_js(input);
877        assert!(
878            result.contains("console.log(1)") && result.contains("console.log(2)"),
879            "Result: {}",
880            result
881        );
882    }
883
884    #[test]
885    fn test_minify_inline_js_preserves_on_parse_error() {
886        let input = "<script>function { broken</script>";
887        let result = minify_inline_js(input);
888        // Should preserve original on parse error
889        assert!(result.contains("function { broken"));
890    }
891
892    #[test]
893    fn test_minify_inline_js_with_attributes() {
894        // Use console.log to prevent DCE
895        let input = r#"<script type="text/javascript">console.log(1);</script>"#;
896        let result = minify_inline_js(input);
897        assert!(result.contains(r#"type="text/javascript""#));
898    }
899
900    #[test]
901    fn test_minify_html_with_inline_js() {
902        // Use console.log to prevent DCE
903        let input = r#"<html><head><script>console.log(true);</script></head></html>"#;
904        let result = minify_html(input);
905        // Should minify JS (true -> !0)
906        assert!(
907            result.contains("!0") || result.contains("true"),
908            "Result: {}",
909            result
910        );
911    }
912
913    #[test]
914    fn test_minify_html_css_minification() {
915        let input = r#"<style>  body  {  color:  red;  }  </style>"#;
916        let result = minify_html(input);
917        assert!(result.len() < input.len());
918    }
919}