1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| crate::static_regex(r"(?s)<!--.*?-->"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 crate::static_regex(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 crate::static_regex(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39});
40
41static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
43 crate::static_regex(
44 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
45 )
46});
47
48pub(crate) fn is_html_file(path: &Path) -> bool {
50 path.extension()
51 .and_then(|e| e.to_str())
52 .is_some_and(|ext| ext == "html")
53}
54
55pub(crate) fn is_remote_url(src: &str) -> bool {
57 src.starts_with("http://")
58 || src.starts_with("https://")
59 || src.starts_with("//")
60 || src.starts_with("data:")
61}
62
63pub(crate) fn is_template_placeholder(value: &str) -> bool {
78 value.contains("{{") || value.contains("###")
79}
80
81pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
88 let stripped = HTML_COMMENT_RE.replace_all(source, "");
89 let mut refs: Vec<String> = Vec::new();
90
91 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
92 if let Some(m) = cap.get(1) {
93 let src = m.as_str().trim();
94 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
95 refs.push(src.to_string());
96 }
97 }
98 }
99
100 for cap in LINK_HREF_RE.captures_iter(&stripped) {
101 if let Some(m) = cap.get(2) {
102 let href = m.as_str().trim();
103 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
104 refs.push(href.to_string());
105 }
106 }
107 }
108 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
109 if let Some(m) = cap.get(1) {
110 let href = m.as_str().trim();
111 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
112 refs.push(href.to_string());
113 }
114 }
115 }
116
117 refs
118}
119
120static CUSTOM_ELEMENT_TAG_RE: std::sync::LazyLock<regex::Regex> =
125 std::sync::LazyLock::new(|| crate::static_regex(r"</?\s*([a-z][a-z0-9]*-[a-z0-9-]*)"));
126
127pub(crate) fn collect_custom_element_tags(source: &str) -> Vec<String> {
133 let stripped = HTML_COMMENT_RE.replace_all(source, "");
134 let mut tags: Vec<String> = Vec::new();
135 for cap in CUSTOM_ELEMENT_TAG_RE.captures_iter(&stripped) {
136 if let Some(m) = cap.get(1) {
137 let tag = m.as_str();
138 if !tags.iter().any(|t| t == tag) {
139 tags.push(tag.to_string());
140 }
141 }
142 }
143 tags
144}
145
146#[cfg(test)]
148pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
149 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
150}
151
152struct HtmlModuleParts {
155 imports: Vec<ImportInfo>,
156 member_accesses: Vec<MemberAccess>,
157 security_sinks: Vec<fallow_types::extract::SinkSite>,
158 angular_used_selectors: Vec<String>,
159 has_dynamic_component_render: bool,
160 complexity: Vec<fallow_types::extract::FunctionComplexity>,
161}
162
163fn collect_html_module_parts(source: &str, need_complexity: bool) -> HtmlModuleParts {
167 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
168 .into_iter()
169 .map(|raw| ImportInfo {
170 source: normalize_asset_url(&raw),
171 imported_name: ImportedName::SideEffect,
172 local_name: String::new(),
173 is_type_only: false,
174 from_style: false,
175 span: Span::default(),
176 source_span: Span::default(),
177 })
178 .collect();
179
180 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
181 imports.dedup_by(|a, b| a.source == b.source);
182
183 let angular::AngularTemplateRefs {
184 identifiers,
185 member_accesses: template_member_accesses,
186 security_sinks,
187 } = angular::collect_angular_template_refs(source);
188 let mut member_accesses: Vec<MemberAccess> = identifiers
189 .into_iter()
190 .map(|name| MemberAccess {
191 object: ANGULAR_TPL_SENTINEL.to_string(),
192 member: name,
193 })
194 .collect();
195 member_accesses.extend(template_member_accesses);
196
197 let angular_used_selectors = angular::collect_angular_used_selectors(source);
202 let has_dynamic_component_render = source.contains("ngComponentOutlet");
203
204 let complexity = if need_complexity {
205 crate::template_complexity::compute_angular_template_complexity(source)
206 .into_iter()
207 .collect()
208 } else {
209 Vec::new()
210 };
211
212 HtmlModuleParts {
213 imports,
214 member_accesses,
215 security_sinks,
216 angular_used_selectors,
217 has_dynamic_component_render,
218 complexity,
219 }
220}
221
222pub(crate) fn parse_html_to_module_with_complexity(
224 file_id: FileId,
225 source: &str,
226 content_hash: u64,
227 need_complexity: bool,
228) -> ModuleInfo {
229 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
230 let parts = collect_html_module_parts(source, need_complexity);
231 html_module_info(file_id, content_hash, source, parsed_suppressions, parts)
232}
233
234fn html_module_info(
238 file_id: FileId,
239 content_hash: u64,
240 source: &str,
241 parsed_suppressions: crate::suppress::ParsedSuppressions,
242 parts: HtmlModuleParts,
243) -> ModuleInfo {
244 let HtmlModuleParts {
245 imports,
246 member_accesses,
247 security_sinks,
248 angular_used_selectors,
249 has_dynamic_component_render,
250 complexity,
251 } = parts;
252
253 ModuleInfo {
254 file_id,
255 exports: Vec::new(),
256 imports,
257 re_exports: Vec::new(),
258 dynamic_imports: Vec::new(),
259 dynamic_import_patterns: Vec::new(),
260 require_calls: Vec::new(),
261 package_path_references: Vec::new(),
262 member_accesses,
263 whole_object_uses: Vec::new(),
264 has_cjs_exports: false,
265 has_angular_component_template_url: false,
266 content_hash,
267 suppressions: parsed_suppressions.suppressions,
268 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
269 unused_import_bindings: Vec::new(),
270 type_referenced_import_bindings: Vec::new(),
271 value_referenced_import_bindings: Vec::new(),
272 line_offsets: fallow_types::extract::compute_line_offsets(source),
273 complexity,
274 flag_uses: Vec::new(),
275 class_heritage: vec![],
276 injection_tokens: vec![],
277 local_type_declarations: Vec::new(),
278 public_signature_type_references: Vec::new(),
279 namespace_object_aliases: Vec::new(),
280 iconify_prefixes: Vec::new(),
281 iconify_icon_names: Vec::new(),
282 auto_import_candidates: Vec::new(),
283 directives: Vec::new(),
284 client_only_dynamic_import_spans: Vec::new(),
285 security_sinks,
286 security_sinks_skipped: 0,
287 security_unresolved_callee_sites: Vec::new(),
288 tainted_bindings: Vec::new(),
289 sanitized_sink_args: Vec::new(),
290 security_control_sites: Vec::new(),
291 callee_uses: Vec::new(),
292 misplaced_directives: Vec::new(),
293 inline_server_action_exports: Vec::new(),
294 di_key_sites: Vec::new(),
295 has_dynamic_provide: false,
296 referenced_import_bindings: Vec::new(),
297 component_props: Vec::new(),
298 has_props_attrs_fallthrough: false,
299 has_define_expose: false,
300 has_define_model: false,
301 has_unharvestable_props: false,
302 component_emits: Vec::new(),
303 angular_inputs: Vec::new(),
304 angular_outputs: Vec::new(),
305 angular_component_selectors: Vec::new(),
306 registered_custom_elements: Vec::new(),
307 used_custom_element_tags: collect_custom_element_tags(source),
312 angular_used_selectors,
313 angular_entry_component_refs: Vec::new(),
314 has_dynamic_component_render,
315 has_unharvestable_emits: false,
316 has_dynamic_emit: false,
317 has_emit_whole_object_use: false,
318 load_return_keys: Vec::new(),
319 has_unharvestable_load: false,
320 has_load_data_whole_use: false,
321 has_page_data_store_whole_use: false,
322 component_functions: Vec::new(),
323 react_props: Vec::new(),
324 hook_uses: Vec::new(),
325 render_edges: Vec::new(),
326 svelte_dispatched_events: Vec::new(),
327 svelte_listened_events: Vec::new(),
328 has_dynamic_dispatch: false,
329 }
330}
331
332#[cfg(test)]
333mod tests {
334 use super::*;
335
336 #[test]
337 fn is_html_file_html() {
338 assert!(is_html_file(Path::new("index.html")));
339 }
340
341 #[test]
342 fn is_html_file_nested() {
343 assert!(is_html_file(Path::new("pages/about.html")));
344 }
345
346 #[test]
347 fn is_html_file_rejects_htm() {
348 assert!(!is_html_file(Path::new("index.htm")));
349 }
350
351 #[test]
352 fn is_html_file_rejects_js() {
353 assert!(!is_html_file(Path::new("app.js")));
354 }
355
356 #[test]
357 fn is_html_file_rejects_ts() {
358 assert!(!is_html_file(Path::new("app.ts")));
359 }
360
361 #[test]
362 fn is_html_file_rejects_vue() {
363 assert!(!is_html_file(Path::new("App.vue")));
364 }
365
366 #[test]
367 fn remote_url_http() {
368 assert!(is_remote_url("http://example.com/script.js"));
369 }
370
371 #[test]
372 fn remote_url_https() {
373 assert!(is_remote_url("https://cdn.example.com/style.css"));
374 }
375
376 #[test]
377 fn remote_url_protocol_relative() {
378 assert!(is_remote_url("//cdn.example.com/lib.js"));
379 }
380
381 #[test]
382 fn remote_url_data() {
383 assert!(is_remote_url("data:text/javascript;base64,abc"));
384 }
385
386 #[test]
387 fn local_relative_not_remote() {
388 assert!(!is_remote_url("./src/entry.js"));
389 }
390
391 #[test]
392 fn local_root_relative_not_remote() {
393 assert!(!is_remote_url("/src/entry.js"));
394 }
395
396 #[test]
397 fn extracts_module_script_src() {
398 let info = parse_html_to_module(
399 FileId(0),
400 r#"<script type="module" src="./src/entry.js"></script>"#,
401 0,
402 );
403 assert_eq!(info.imports.len(), 1);
404 assert_eq!(info.imports[0].source, "./src/entry.js");
405 }
406
407 #[test]
408 fn extracts_plain_script_src() {
409 let info = parse_html_to_module(
410 FileId(0),
411 r#"<script src="./src/polyfills.js"></script>"#,
412 0,
413 );
414 assert_eq!(info.imports.len(), 1);
415 assert_eq!(info.imports[0].source, "./src/polyfills.js");
416 }
417
418 #[test]
419 fn extracts_multiple_scripts() {
420 let info = parse_html_to_module(
421 FileId(0),
422 r#"
423 <script type="module" src="./src/entry.js"></script>
424 <script src="./src/polyfills.js"></script>
425 "#,
426 0,
427 );
428 assert_eq!(info.imports.len(), 2);
429 }
430
431 #[test]
432 fn skips_inline_script() {
433 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
434 assert!(info.imports.is_empty());
435 }
436
437 #[test]
438 fn skips_handlebars_placeholder_in_script_src() {
439 let info = parse_html_to_module(
440 FileId(0),
441 r#"<script src="{{rootURL}}assets/app.js"></script>
442 <script src="{{config.assetsPath}}vendor.js"></script>"#,
443 0,
444 );
445 assert!(
446 info.imports.is_empty(),
447 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
448 info.imports
449 );
450 }
451
452 #[test]
453 fn skips_handlebars_placeholder_in_link_href() {
454 let info = parse_html_to_module(
455 FileId(0),
456 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
457 0,
458 );
459 assert!(info.imports.is_empty());
460 }
461
462 #[test]
463 fn skips_ember_cli_blueprint_placeholder() {
464 let info = parse_html_to_module(
465 FileId(0),
466 r####"<script src="###APPNAME###/app.js"></script>"####,
467 0,
468 );
469 assert!(info.imports.is_empty());
470 }
471
472 #[test]
473 fn extracts_normal_specifier_alongside_placeholders() {
474 let info = parse_html_to_module(
475 FileId(0),
476 r#"<script src="{{rootURL}}assets/app.js"></script>
477 <script src="./src/main.ts"></script>"#,
478 0,
479 );
480 assert_eq!(info.imports.len(), 1);
481 assert_eq!(info.imports[0].source, "./src/main.ts");
482 }
483
484 #[test]
485 fn skips_remote_script() {
486 let info = parse_html_to_module(
487 FileId(0),
488 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
489 0,
490 );
491 assert!(info.imports.is_empty());
492 }
493
494 #[test]
495 fn skips_protocol_relative_script() {
496 let info = parse_html_to_module(
497 FileId(0),
498 r#"<script src="//cdn.example.com/lib.js"></script>"#,
499 0,
500 );
501 assert!(info.imports.is_empty());
502 }
503
504 #[test]
505 fn extracts_stylesheet_link() {
506 let info = parse_html_to_module(
507 FileId(0),
508 r#"<link rel="stylesheet" href="./src/global.css" />"#,
509 0,
510 );
511 assert_eq!(info.imports.len(), 1);
512 assert_eq!(info.imports[0].source, "./src/global.css");
513 }
514
515 #[test]
516 fn extracts_modulepreload_link() {
517 let info = parse_html_to_module(
518 FileId(0),
519 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
520 0,
521 );
522 assert_eq!(info.imports.len(), 1);
523 assert_eq!(info.imports[0].source, "./src/vendor.js");
524 }
525
526 #[test]
527 fn extracts_link_with_reversed_attrs() {
528 let info = parse_html_to_module(
529 FileId(0),
530 r#"<link href="./src/global.css" rel="stylesheet" />"#,
531 0,
532 );
533 assert_eq!(info.imports.len(), 1);
534 assert_eq!(info.imports[0].source, "./src/global.css");
535 }
536
537 #[test]
538 fn bare_script_src_normalized_to_relative() {
539 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
540 assert_eq!(info.imports.len(), 1);
541 assert_eq!(info.imports[0].source, "./app.js");
542 }
543
544 #[test]
545 fn bare_module_script_src_normalized_to_relative() {
546 let info = parse_html_to_module(
547 FileId(0),
548 r#"<script type="module" src="main.ts"></script>"#,
549 0,
550 );
551 assert_eq!(info.imports.len(), 1);
552 assert_eq!(info.imports[0].source, "./main.ts");
553 }
554
555 #[test]
556 fn bare_stylesheet_link_href_normalized_to_relative() {
557 let info = parse_html_to_module(
558 FileId(0),
559 r#"<link rel="stylesheet" href="styles.css" />"#,
560 0,
561 );
562 assert_eq!(info.imports.len(), 1);
563 assert_eq!(info.imports[0].source, "./styles.css");
564 }
565
566 #[test]
567 fn bare_link_href_reversed_attrs_normalized_to_relative() {
568 let info = parse_html_to_module(
569 FileId(0),
570 r#"<link href="styles.css" rel="stylesheet" />"#,
571 0,
572 );
573 assert_eq!(info.imports.len(), 1);
574 assert_eq!(info.imports[0].source, "./styles.css");
575 }
576
577 #[test]
578 fn bare_modulepreload_link_href_normalized_to_relative() {
579 let info = parse_html_to_module(
580 FileId(0),
581 r#"<link rel="modulepreload" href="vendor.js" />"#,
582 0,
583 );
584 assert_eq!(info.imports.len(), 1);
585 assert_eq!(info.imports[0].source, "./vendor.js");
586 }
587
588 #[test]
589 fn bare_asset_with_subdir_normalized_to_relative() {
590 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
591 assert_eq!(info.imports.len(), 1);
592 assert_eq!(info.imports[0].source, "./assets/app.js");
593 }
594
595 #[test]
596 fn root_absolute_script_src_unchanged() {
597 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
598 assert_eq!(info.imports.len(), 1);
599 assert_eq!(info.imports[0].source, "/src/main.ts");
600 }
601
602 #[test]
603 fn parent_relative_script_src_unchanged() {
604 let info = parse_html_to_module(
605 FileId(0),
606 r#"<script src="../shared/vendor.js"></script>"#,
607 0,
608 );
609 assert_eq!(info.imports.len(), 1);
610 assert_eq!(info.imports[0].source, "../shared/vendor.js");
611 }
612
613 #[test]
614 fn skips_preload_link() {
615 let info = parse_html_to_module(
616 FileId(0),
617 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
618 0,
619 );
620 assert!(info.imports.is_empty());
621 }
622
623 #[test]
624 fn skips_icon_link() {
625 let info =
626 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
627 assert!(info.imports.is_empty());
628 }
629
630 #[test]
631 fn skips_remote_stylesheet() {
632 let info = parse_html_to_module(
633 FileId(0),
634 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
635 0,
636 );
637 assert!(info.imports.is_empty());
638 }
639
640 #[test]
641 fn skips_commented_out_script() {
642 let info = parse_html_to_module(
643 FileId(0),
644 r#"<!-- <script src="./old.js"></script> -->
645 <script src="./new.js"></script>"#,
646 0,
647 );
648 assert_eq!(info.imports.len(), 1);
649 assert_eq!(info.imports[0].source, "./new.js");
650 }
651
652 #[test]
653 fn skips_commented_out_link() {
654 let info = parse_html_to_module(
655 FileId(0),
656 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
657 <link rel="stylesheet" href="./new.css" />"#,
658 0,
659 );
660 assert_eq!(info.imports.len(), 1);
661 assert_eq!(info.imports[0].source, "./new.css");
662 }
663
664 #[test]
665 fn handles_multiline_script_tag() {
666 let info = parse_html_to_module(
667 FileId(0),
668 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
669 0,
670 );
671 assert_eq!(info.imports.len(), 1);
672 assert_eq!(info.imports[0].source, "./src/entry.js");
673 }
674
675 #[test]
676 fn handles_multiline_link_tag() {
677 let info = parse_html_to_module(
678 FileId(0),
679 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
680 0,
681 );
682 assert_eq!(info.imports.len(), 1);
683 assert_eq!(info.imports[0].source, "./src/global.css");
684 }
685
686 #[test]
687 fn full_vite_html() {
688 let info = parse_html_to_module(
689 FileId(0),
690 r#"<!doctype html>
691<html>
692 <head>
693 <link rel="stylesheet" href="./src/global.css" />
694 <link rel="icon" href="/favicon.ico" />
695 </head>
696 <body>
697 <div id="app"></div>
698 <script type="module" src="./src/entry.js"></script>
699 </body>
700</html>"#,
701 0,
702 );
703 assert_eq!(info.imports.len(), 2);
704 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
705 assert!(sources.contains(&"./src/global.css"));
706 assert!(sources.contains(&"./src/entry.js"));
707 }
708
709 #[test]
710 fn empty_html() {
711 let info = parse_html_to_module(FileId(0), "", 0);
712 assert!(info.imports.is_empty());
713 }
714
715 #[test]
716 fn html_with_no_assets() {
717 let info = parse_html_to_module(
718 FileId(0),
719 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
720 0,
721 );
722 assert!(info.imports.is_empty());
723 }
724
725 #[test]
726 fn single_quoted_attributes() {
727 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
728 assert_eq!(info.imports.len(), 1);
729 assert_eq!(info.imports[0].source, "./src/entry.js");
730 }
731
732 #[test]
733 fn all_imports_are_side_effect() {
734 let info = parse_html_to_module(
735 FileId(0),
736 r#"<script src="./entry.js"></script>
737 <link rel="stylesheet" href="./style.css" />"#,
738 0,
739 );
740 for imp in &info.imports {
741 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
742 assert!(imp.local_name.is_empty());
743 assert!(!imp.is_type_only);
744 }
745 }
746
747 #[test]
748 fn suppression_comments_extracted() {
749 let info = parse_html_to_module(
750 FileId(0),
751 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
752 0,
753 );
754 assert_eq!(info.imports.len(), 1);
755 }
756
757 #[test]
758 fn angular_template_extracts_member_refs() {
759 let info = parse_html_to_module(
760 FileId(0),
761 "<h1>{{ title() }}</h1>\n\
762 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
763 <button (click)=\"onButtonClick()\">Toggle</button>",
764 0,
765 );
766 let names: rustc_hash::FxHashSet<&str> = info
767 .member_accesses
768 .iter()
769 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
770 .map(|a| a.member.as_str())
771 .collect();
772 assert!(names.contains("title"), "should contain 'title'");
773 assert!(
774 names.contains("isHighlighted"),
775 "should contain 'isHighlighted'"
776 );
777 assert!(names.contains("greeting"), "should contain 'greeting'");
778 assert!(
779 names.contains("onButtonClick"),
780 "should contain 'onButtonClick'"
781 );
782 }
783
784 #[test]
785 fn plain_html_no_angular_refs() {
786 let info = parse_html_to_module(
787 FileId(0),
788 "<!doctype html><html><body><h1>Hello</h1></body></html>",
789 0,
790 );
791 assert!(info.member_accesses.is_empty());
792 }
793}