1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| crate::static_regex(r"(?s)<!--.*?-->"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 crate::static_regex(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 crate::static_regex(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39});
40
41static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
43 crate::static_regex(
44 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
45 )
46});
47
48pub(crate) fn is_html_file(path: &Path) -> bool {
50 path.extension()
51 .and_then(|e| e.to_str())
52 .is_some_and(|ext| ext == "html")
53}
54
55pub(crate) fn is_remote_url(src: &str) -> bool {
57 src.starts_with("http://")
58 || src.starts_with("https://")
59 || src.starts_with("//")
60 || src.starts_with("data:")
61}
62
63pub(crate) fn is_template_placeholder(value: &str) -> bool {
78 value.contains("{{") || value.contains("###")
79}
80
81pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
88 let stripped = HTML_COMMENT_RE.replace_all(source, "");
89 let mut refs: Vec<String> = Vec::new();
90
91 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
92 if let Some(m) = cap.get(1) {
93 let src = m.as_str().trim();
94 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
95 refs.push(src.to_string());
96 }
97 }
98 }
99
100 for cap in LINK_HREF_RE.captures_iter(&stripped) {
101 if let Some(m) = cap.get(2) {
102 let href = m.as_str().trim();
103 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
104 refs.push(href.to_string());
105 }
106 }
107 }
108 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
109 if let Some(m) = cap.get(1) {
110 let href = m.as_str().trim();
111 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
112 refs.push(href.to_string());
113 }
114 }
115 }
116
117 refs
118}
119
120#[cfg(test)]
122pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
123 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
124}
125
126struct HtmlModuleParts {
129 imports: Vec<ImportInfo>,
130 member_accesses: Vec<MemberAccess>,
131 security_sinks: Vec<fallow_types::extract::SinkSite>,
132 angular_used_selectors: Vec<String>,
133 has_dynamic_component_render: bool,
134 complexity: Vec<fallow_types::extract::FunctionComplexity>,
135}
136
137fn collect_html_module_parts(source: &str, need_complexity: bool) -> HtmlModuleParts {
141 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
142 .into_iter()
143 .map(|raw| ImportInfo {
144 source: normalize_asset_url(&raw),
145 imported_name: ImportedName::SideEffect,
146 local_name: String::new(),
147 is_type_only: false,
148 from_style: false,
149 span: Span::default(),
150 source_span: Span::default(),
151 })
152 .collect();
153
154 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
155 imports.dedup_by(|a, b| a.source == b.source);
156
157 let angular::AngularTemplateRefs {
158 identifiers,
159 member_accesses: template_member_accesses,
160 security_sinks,
161 } = angular::collect_angular_template_refs(source);
162 let mut member_accesses: Vec<MemberAccess> = identifiers
163 .into_iter()
164 .map(|name| MemberAccess {
165 object: ANGULAR_TPL_SENTINEL.to_string(),
166 member: name,
167 })
168 .collect();
169 member_accesses.extend(template_member_accesses);
170
171 let angular_used_selectors = angular::collect_angular_used_selectors(source);
176 let has_dynamic_component_render = source.contains("ngComponentOutlet");
177
178 let complexity = if need_complexity {
179 crate::template_complexity::compute_angular_template_complexity(source)
180 .into_iter()
181 .collect()
182 } else {
183 Vec::new()
184 };
185
186 HtmlModuleParts {
187 imports,
188 member_accesses,
189 security_sinks,
190 angular_used_selectors,
191 has_dynamic_component_render,
192 complexity,
193 }
194}
195
196pub(crate) fn parse_html_to_module_with_complexity(
198 file_id: FileId,
199 source: &str,
200 content_hash: u64,
201 need_complexity: bool,
202) -> ModuleInfo {
203 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
204 let parts = collect_html_module_parts(source, need_complexity);
205 html_module_info(file_id, content_hash, source, parsed_suppressions, parts)
206}
207
208fn html_module_info(
212 file_id: FileId,
213 content_hash: u64,
214 source: &str,
215 parsed_suppressions: crate::suppress::ParsedSuppressions,
216 parts: HtmlModuleParts,
217) -> ModuleInfo {
218 let HtmlModuleParts {
219 imports,
220 member_accesses,
221 security_sinks,
222 angular_used_selectors,
223 has_dynamic_component_render,
224 complexity,
225 } = parts;
226
227 ModuleInfo {
228 file_id,
229 exports: Vec::new(),
230 imports,
231 re_exports: Vec::new(),
232 dynamic_imports: Vec::new(),
233 dynamic_import_patterns: Vec::new(),
234 require_calls: Vec::new(),
235 package_path_references: Vec::new(),
236 member_accesses,
237 whole_object_uses: Vec::new(),
238 has_cjs_exports: false,
239 has_angular_component_template_url: false,
240 content_hash,
241 suppressions: parsed_suppressions.suppressions,
242 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
243 unused_import_bindings: Vec::new(),
244 type_referenced_import_bindings: Vec::new(),
245 value_referenced_import_bindings: Vec::new(),
246 line_offsets: fallow_types::extract::compute_line_offsets(source),
247 complexity,
248 flag_uses: Vec::new(),
249 class_heritage: vec![],
250 injection_tokens: vec![],
251 local_type_declarations: Vec::new(),
252 public_signature_type_references: Vec::new(),
253 namespace_object_aliases: Vec::new(),
254 iconify_prefixes: Vec::new(),
255 iconify_icon_names: Vec::new(),
256 auto_import_candidates: Vec::new(),
257 directives: Vec::new(),
258 client_only_dynamic_import_spans: Vec::new(),
259 security_sinks,
260 security_sinks_skipped: 0,
261 security_unresolved_callee_sites: Vec::new(),
262 tainted_bindings: Vec::new(),
263 sanitized_sink_args: Vec::new(),
264 security_control_sites: Vec::new(),
265 callee_uses: Vec::new(),
266 misplaced_directives: Vec::new(),
267 inline_server_action_exports: Vec::new(),
268 di_key_sites: Vec::new(),
269 has_dynamic_provide: false,
270 referenced_import_bindings: Vec::new(),
271 component_props: Vec::new(),
272 has_props_attrs_fallthrough: false,
273 has_define_expose: false,
274 has_define_model: false,
275 has_unharvestable_props: false,
276 component_emits: Vec::new(),
277 angular_inputs: Vec::new(),
278 angular_outputs: Vec::new(),
279 angular_component_selectors: Vec::new(),
280 angular_used_selectors,
281 angular_entry_component_refs: Vec::new(),
282 has_dynamic_component_render,
283 has_unharvestable_emits: false,
284 has_dynamic_emit: false,
285 has_emit_whole_object_use: false,
286 load_return_keys: Vec::new(),
287 has_unharvestable_load: false,
288 has_load_data_whole_use: false,
289 has_page_data_store_whole_use: false,
290 component_functions: Vec::new(),
291 react_props: Vec::new(),
292 hook_uses: Vec::new(),
293 render_edges: Vec::new(),
294 svelte_dispatched_events: Vec::new(),
295 svelte_listened_events: Vec::new(),
296 has_dynamic_dispatch: false,
297 }
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303
304 #[test]
305 fn is_html_file_html() {
306 assert!(is_html_file(Path::new("index.html")));
307 }
308
309 #[test]
310 fn is_html_file_nested() {
311 assert!(is_html_file(Path::new("pages/about.html")));
312 }
313
314 #[test]
315 fn is_html_file_rejects_htm() {
316 assert!(!is_html_file(Path::new("index.htm")));
317 }
318
319 #[test]
320 fn is_html_file_rejects_js() {
321 assert!(!is_html_file(Path::new("app.js")));
322 }
323
324 #[test]
325 fn is_html_file_rejects_ts() {
326 assert!(!is_html_file(Path::new("app.ts")));
327 }
328
329 #[test]
330 fn is_html_file_rejects_vue() {
331 assert!(!is_html_file(Path::new("App.vue")));
332 }
333
334 #[test]
335 fn remote_url_http() {
336 assert!(is_remote_url("http://example.com/script.js"));
337 }
338
339 #[test]
340 fn remote_url_https() {
341 assert!(is_remote_url("https://cdn.example.com/style.css"));
342 }
343
344 #[test]
345 fn remote_url_protocol_relative() {
346 assert!(is_remote_url("//cdn.example.com/lib.js"));
347 }
348
349 #[test]
350 fn remote_url_data() {
351 assert!(is_remote_url("data:text/javascript;base64,abc"));
352 }
353
354 #[test]
355 fn local_relative_not_remote() {
356 assert!(!is_remote_url("./src/entry.js"));
357 }
358
359 #[test]
360 fn local_root_relative_not_remote() {
361 assert!(!is_remote_url("/src/entry.js"));
362 }
363
364 #[test]
365 fn extracts_module_script_src() {
366 let info = parse_html_to_module(
367 FileId(0),
368 r#"<script type="module" src="./src/entry.js"></script>"#,
369 0,
370 );
371 assert_eq!(info.imports.len(), 1);
372 assert_eq!(info.imports[0].source, "./src/entry.js");
373 }
374
375 #[test]
376 fn extracts_plain_script_src() {
377 let info = parse_html_to_module(
378 FileId(0),
379 r#"<script src="./src/polyfills.js"></script>"#,
380 0,
381 );
382 assert_eq!(info.imports.len(), 1);
383 assert_eq!(info.imports[0].source, "./src/polyfills.js");
384 }
385
386 #[test]
387 fn extracts_multiple_scripts() {
388 let info = parse_html_to_module(
389 FileId(0),
390 r#"
391 <script type="module" src="./src/entry.js"></script>
392 <script src="./src/polyfills.js"></script>
393 "#,
394 0,
395 );
396 assert_eq!(info.imports.len(), 2);
397 }
398
399 #[test]
400 fn skips_inline_script() {
401 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
402 assert!(info.imports.is_empty());
403 }
404
405 #[test]
406 fn skips_handlebars_placeholder_in_script_src() {
407 let info = parse_html_to_module(
408 FileId(0),
409 r#"<script src="{{rootURL}}assets/app.js"></script>
410 <script src="{{config.assetsPath}}vendor.js"></script>"#,
411 0,
412 );
413 assert!(
414 info.imports.is_empty(),
415 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
416 info.imports
417 );
418 }
419
420 #[test]
421 fn skips_handlebars_placeholder_in_link_href() {
422 let info = parse_html_to_module(
423 FileId(0),
424 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
425 0,
426 );
427 assert!(info.imports.is_empty());
428 }
429
430 #[test]
431 fn skips_ember_cli_blueprint_placeholder() {
432 let info = parse_html_to_module(
433 FileId(0),
434 r####"<script src="###APPNAME###/app.js"></script>"####,
435 0,
436 );
437 assert!(info.imports.is_empty());
438 }
439
440 #[test]
441 fn extracts_normal_specifier_alongside_placeholders() {
442 let info = parse_html_to_module(
443 FileId(0),
444 r#"<script src="{{rootURL}}assets/app.js"></script>
445 <script src="./src/main.ts"></script>"#,
446 0,
447 );
448 assert_eq!(info.imports.len(), 1);
449 assert_eq!(info.imports[0].source, "./src/main.ts");
450 }
451
452 #[test]
453 fn skips_remote_script() {
454 let info = parse_html_to_module(
455 FileId(0),
456 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
457 0,
458 );
459 assert!(info.imports.is_empty());
460 }
461
462 #[test]
463 fn skips_protocol_relative_script() {
464 let info = parse_html_to_module(
465 FileId(0),
466 r#"<script src="//cdn.example.com/lib.js"></script>"#,
467 0,
468 );
469 assert!(info.imports.is_empty());
470 }
471
472 #[test]
473 fn extracts_stylesheet_link() {
474 let info = parse_html_to_module(
475 FileId(0),
476 r#"<link rel="stylesheet" href="./src/global.css" />"#,
477 0,
478 );
479 assert_eq!(info.imports.len(), 1);
480 assert_eq!(info.imports[0].source, "./src/global.css");
481 }
482
483 #[test]
484 fn extracts_modulepreload_link() {
485 let info = parse_html_to_module(
486 FileId(0),
487 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
488 0,
489 );
490 assert_eq!(info.imports.len(), 1);
491 assert_eq!(info.imports[0].source, "./src/vendor.js");
492 }
493
494 #[test]
495 fn extracts_link_with_reversed_attrs() {
496 let info = parse_html_to_module(
497 FileId(0),
498 r#"<link href="./src/global.css" rel="stylesheet" />"#,
499 0,
500 );
501 assert_eq!(info.imports.len(), 1);
502 assert_eq!(info.imports[0].source, "./src/global.css");
503 }
504
505 #[test]
506 fn bare_script_src_normalized_to_relative() {
507 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
508 assert_eq!(info.imports.len(), 1);
509 assert_eq!(info.imports[0].source, "./app.js");
510 }
511
512 #[test]
513 fn bare_module_script_src_normalized_to_relative() {
514 let info = parse_html_to_module(
515 FileId(0),
516 r#"<script type="module" src="main.ts"></script>"#,
517 0,
518 );
519 assert_eq!(info.imports.len(), 1);
520 assert_eq!(info.imports[0].source, "./main.ts");
521 }
522
523 #[test]
524 fn bare_stylesheet_link_href_normalized_to_relative() {
525 let info = parse_html_to_module(
526 FileId(0),
527 r#"<link rel="stylesheet" href="styles.css" />"#,
528 0,
529 );
530 assert_eq!(info.imports.len(), 1);
531 assert_eq!(info.imports[0].source, "./styles.css");
532 }
533
534 #[test]
535 fn bare_link_href_reversed_attrs_normalized_to_relative() {
536 let info = parse_html_to_module(
537 FileId(0),
538 r#"<link href="styles.css" rel="stylesheet" />"#,
539 0,
540 );
541 assert_eq!(info.imports.len(), 1);
542 assert_eq!(info.imports[0].source, "./styles.css");
543 }
544
545 #[test]
546 fn bare_modulepreload_link_href_normalized_to_relative() {
547 let info = parse_html_to_module(
548 FileId(0),
549 r#"<link rel="modulepreload" href="vendor.js" />"#,
550 0,
551 );
552 assert_eq!(info.imports.len(), 1);
553 assert_eq!(info.imports[0].source, "./vendor.js");
554 }
555
556 #[test]
557 fn bare_asset_with_subdir_normalized_to_relative() {
558 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
559 assert_eq!(info.imports.len(), 1);
560 assert_eq!(info.imports[0].source, "./assets/app.js");
561 }
562
563 #[test]
564 fn root_absolute_script_src_unchanged() {
565 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
566 assert_eq!(info.imports.len(), 1);
567 assert_eq!(info.imports[0].source, "/src/main.ts");
568 }
569
570 #[test]
571 fn parent_relative_script_src_unchanged() {
572 let info = parse_html_to_module(
573 FileId(0),
574 r#"<script src="../shared/vendor.js"></script>"#,
575 0,
576 );
577 assert_eq!(info.imports.len(), 1);
578 assert_eq!(info.imports[0].source, "../shared/vendor.js");
579 }
580
581 #[test]
582 fn skips_preload_link() {
583 let info = parse_html_to_module(
584 FileId(0),
585 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
586 0,
587 );
588 assert!(info.imports.is_empty());
589 }
590
591 #[test]
592 fn skips_icon_link() {
593 let info =
594 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
595 assert!(info.imports.is_empty());
596 }
597
598 #[test]
599 fn skips_remote_stylesheet() {
600 let info = parse_html_to_module(
601 FileId(0),
602 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
603 0,
604 );
605 assert!(info.imports.is_empty());
606 }
607
608 #[test]
609 fn skips_commented_out_script() {
610 let info = parse_html_to_module(
611 FileId(0),
612 r#"<!-- <script src="./old.js"></script> -->
613 <script src="./new.js"></script>"#,
614 0,
615 );
616 assert_eq!(info.imports.len(), 1);
617 assert_eq!(info.imports[0].source, "./new.js");
618 }
619
620 #[test]
621 fn skips_commented_out_link() {
622 let info = parse_html_to_module(
623 FileId(0),
624 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
625 <link rel="stylesheet" href="./new.css" />"#,
626 0,
627 );
628 assert_eq!(info.imports.len(), 1);
629 assert_eq!(info.imports[0].source, "./new.css");
630 }
631
632 #[test]
633 fn handles_multiline_script_tag() {
634 let info = parse_html_to_module(
635 FileId(0),
636 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
637 0,
638 );
639 assert_eq!(info.imports.len(), 1);
640 assert_eq!(info.imports[0].source, "./src/entry.js");
641 }
642
643 #[test]
644 fn handles_multiline_link_tag() {
645 let info = parse_html_to_module(
646 FileId(0),
647 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
648 0,
649 );
650 assert_eq!(info.imports.len(), 1);
651 assert_eq!(info.imports[0].source, "./src/global.css");
652 }
653
654 #[test]
655 fn full_vite_html() {
656 let info = parse_html_to_module(
657 FileId(0),
658 r#"<!doctype html>
659<html>
660 <head>
661 <link rel="stylesheet" href="./src/global.css" />
662 <link rel="icon" href="/favicon.ico" />
663 </head>
664 <body>
665 <div id="app"></div>
666 <script type="module" src="./src/entry.js"></script>
667 </body>
668</html>"#,
669 0,
670 );
671 assert_eq!(info.imports.len(), 2);
672 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
673 assert!(sources.contains(&"./src/global.css"));
674 assert!(sources.contains(&"./src/entry.js"));
675 }
676
677 #[test]
678 fn empty_html() {
679 let info = parse_html_to_module(FileId(0), "", 0);
680 assert!(info.imports.is_empty());
681 }
682
683 #[test]
684 fn html_with_no_assets() {
685 let info = parse_html_to_module(
686 FileId(0),
687 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
688 0,
689 );
690 assert!(info.imports.is_empty());
691 }
692
693 #[test]
694 fn single_quoted_attributes() {
695 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
696 assert_eq!(info.imports.len(), 1);
697 assert_eq!(info.imports[0].source, "./src/entry.js");
698 }
699
700 #[test]
701 fn all_imports_are_side_effect() {
702 let info = parse_html_to_module(
703 FileId(0),
704 r#"<script src="./entry.js"></script>
705 <link rel="stylesheet" href="./style.css" />"#,
706 0,
707 );
708 for imp in &info.imports {
709 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
710 assert!(imp.local_name.is_empty());
711 assert!(!imp.is_type_only);
712 }
713 }
714
715 #[test]
716 fn suppression_comments_extracted() {
717 let info = parse_html_to_module(
718 FileId(0),
719 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
720 0,
721 );
722 assert_eq!(info.imports.len(), 1);
723 }
724
725 #[test]
726 fn angular_template_extracts_member_refs() {
727 let info = parse_html_to_module(
728 FileId(0),
729 "<h1>{{ title() }}</h1>\n\
730 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
731 <button (click)=\"onButtonClick()\">Toggle</button>",
732 0,
733 );
734 let names: rustc_hash::FxHashSet<&str> = info
735 .member_accesses
736 .iter()
737 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
738 .map(|a| a.member.as_str())
739 .collect();
740 assert!(names.contains("title"), "should contain 'title'");
741 assert!(
742 names.contains("isHighlighted"),
743 "should contain 'isHighlighted'"
744 );
745 assert!(names.contains("greeting"), "should contain 'greeting'");
746 assert!(
747 names.contains("onButtonClick"),
748 "should contain 'onButtonClick'"
749 );
750 }
751
752 #[test]
753 fn plain_html_no_angular_refs() {
754 let info = parse_html_to_module(
755 FileId(0),
756 "<!doctype html><html><body><h1>Hello</h1></body></html>",
757 0,
758 );
759 assert!(info.member_accesses.is_empty());
760 }
761}