1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| crate::static_regex(r"(?s)<!--.*?-->"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 crate::static_regex(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 crate::static_regex(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39});
40
41static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
43 crate::static_regex(
44 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
45 )
46});
47
48pub(crate) fn is_html_file(path: &Path) -> bool {
50 path.extension()
51 .and_then(|e| e.to_str())
52 .is_some_and(|ext| ext == "html")
53}
54
55pub(crate) fn is_remote_url(src: &str) -> bool {
57 src.starts_with("http://")
58 || src.starts_with("https://")
59 || src.starts_with("//")
60 || src.starts_with("data:")
61}
62
63pub(crate) fn is_template_placeholder(value: &str) -> bool {
78 value.contains("{{") || value.contains("###")
79}
80
81pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
88 let stripped = HTML_COMMENT_RE.replace_all(source, "");
89 let mut refs: Vec<String> = Vec::new();
90
91 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
92 if let Some(m) = cap.get(1) {
93 let src = m.as_str().trim();
94 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
95 refs.push(src.to_string());
96 }
97 }
98 }
99
100 for cap in LINK_HREF_RE.captures_iter(&stripped) {
101 if let Some(m) = cap.get(2) {
102 let href = m.as_str().trim();
103 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
104 refs.push(href.to_string());
105 }
106 }
107 }
108 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
109 if let Some(m) = cap.get(1) {
110 let href = m.as_str().trim();
111 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
112 refs.push(href.to_string());
113 }
114 }
115 }
116
117 refs
118}
119
120#[cfg(test)]
122pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
123 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
124}
125
126pub(crate) fn parse_html_to_module_with_complexity(
128 file_id: FileId,
129 source: &str,
130 content_hash: u64,
131 need_complexity: bool,
132) -> ModuleInfo {
133 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
134
135 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
136 .into_iter()
137 .map(|raw| ImportInfo {
138 source: normalize_asset_url(&raw),
139 imported_name: ImportedName::SideEffect,
140 local_name: String::new(),
141 is_type_only: false,
142 from_style: false,
143 span: Span::default(),
144 source_span: Span::default(),
145 })
146 .collect();
147
148 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
149 imports.dedup_by(|a, b| a.source == b.source);
150
151 let angular::AngularTemplateRefs {
152 identifiers,
153 member_accesses: template_member_accesses,
154 security_sinks,
155 } = angular::collect_angular_template_refs(source);
156 let mut member_accesses: Vec<MemberAccess> = identifiers
157 .into_iter()
158 .map(|name| MemberAccess {
159 object: ANGULAR_TPL_SENTINEL.to_string(),
160 member: name,
161 })
162 .collect();
163 member_accesses.extend(template_member_accesses);
164
165 let angular_used_selectors = angular::collect_angular_used_selectors(source);
170 let has_dynamic_component_render = source.contains("ngComponentOutlet");
171
172 let complexity = if need_complexity {
173 crate::template_complexity::compute_angular_template_complexity(source)
174 .into_iter()
175 .collect()
176 } else {
177 Vec::new()
178 };
179
180 ModuleInfo {
181 file_id,
182 exports: Vec::new(),
183 imports,
184 re_exports: Vec::new(),
185 dynamic_imports: Vec::new(),
186 dynamic_import_patterns: Vec::new(),
187 require_calls: Vec::new(),
188 package_path_references: Vec::new(),
189 member_accesses,
190 whole_object_uses: Vec::new(),
191 has_cjs_exports: false,
192 has_angular_component_template_url: false,
193 content_hash,
194 suppressions: parsed_suppressions.suppressions,
195 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
196 unused_import_bindings: Vec::new(),
197 type_referenced_import_bindings: Vec::new(),
198 value_referenced_import_bindings: Vec::new(),
199 line_offsets: fallow_types::extract::compute_line_offsets(source),
200 complexity,
201 flag_uses: Vec::new(),
202 class_heritage: vec![],
203 injection_tokens: vec![],
204 local_type_declarations: Vec::new(),
205 public_signature_type_references: Vec::new(),
206 namespace_object_aliases: Vec::new(),
207 iconify_prefixes: Vec::new(),
208 iconify_icon_names: Vec::new(),
209 auto_import_candidates: Vec::new(),
210 directives: Vec::new(),
211 client_only_dynamic_import_spans: Vec::new(),
212 security_sinks,
213 security_sinks_skipped: 0,
214 security_unresolved_callee_sites: Vec::new(),
215 tainted_bindings: Vec::new(),
216 sanitized_sink_args: Vec::new(),
217 security_control_sites: Vec::new(),
218 callee_uses: Vec::new(),
219 misplaced_directives: Vec::new(),
220 inline_server_action_exports: Vec::new(),
221 di_key_sites: Vec::new(),
222 has_dynamic_provide: false,
223 referenced_import_bindings: Vec::new(),
224 component_props: Vec::new(),
225 has_props_attrs_fallthrough: false,
226 has_define_expose: false,
227 has_define_model: false,
228 has_unharvestable_props: false,
229 component_emits: Vec::new(),
230 angular_inputs: Vec::new(),
231 angular_outputs: Vec::new(),
232 angular_component_selectors: Vec::new(),
233 angular_used_selectors,
234 angular_entry_component_refs: Vec::new(),
235 has_dynamic_component_render,
236 has_unharvestable_emits: false,
237 has_dynamic_emit: false,
238 has_emit_whole_object_use: false,
239 load_return_keys: Vec::new(),
240 has_unharvestable_load: false,
241 has_load_data_whole_use: false,
242 has_page_data_store_whole_use: false,
243 component_functions: Vec::new(),
244 react_props: Vec::new(),
245 hook_uses: Vec::new(),
246 render_edges: Vec::new(),
247 svelte_dispatched_events: Vec::new(),
248 svelte_listened_events: Vec::new(),
249 has_dynamic_dispatch: false,
250 }
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256
257 #[test]
258 fn is_html_file_html() {
259 assert!(is_html_file(Path::new("index.html")));
260 }
261
262 #[test]
263 fn is_html_file_nested() {
264 assert!(is_html_file(Path::new("pages/about.html")));
265 }
266
267 #[test]
268 fn is_html_file_rejects_htm() {
269 assert!(!is_html_file(Path::new("index.htm")));
270 }
271
272 #[test]
273 fn is_html_file_rejects_js() {
274 assert!(!is_html_file(Path::new("app.js")));
275 }
276
277 #[test]
278 fn is_html_file_rejects_ts() {
279 assert!(!is_html_file(Path::new("app.ts")));
280 }
281
282 #[test]
283 fn is_html_file_rejects_vue() {
284 assert!(!is_html_file(Path::new("App.vue")));
285 }
286
287 #[test]
288 fn remote_url_http() {
289 assert!(is_remote_url("http://example.com/script.js"));
290 }
291
292 #[test]
293 fn remote_url_https() {
294 assert!(is_remote_url("https://cdn.example.com/style.css"));
295 }
296
297 #[test]
298 fn remote_url_protocol_relative() {
299 assert!(is_remote_url("//cdn.example.com/lib.js"));
300 }
301
302 #[test]
303 fn remote_url_data() {
304 assert!(is_remote_url("data:text/javascript;base64,abc"));
305 }
306
307 #[test]
308 fn local_relative_not_remote() {
309 assert!(!is_remote_url("./src/entry.js"));
310 }
311
312 #[test]
313 fn local_root_relative_not_remote() {
314 assert!(!is_remote_url("/src/entry.js"));
315 }
316
317 #[test]
318 fn extracts_module_script_src() {
319 let info = parse_html_to_module(
320 FileId(0),
321 r#"<script type="module" src="./src/entry.js"></script>"#,
322 0,
323 );
324 assert_eq!(info.imports.len(), 1);
325 assert_eq!(info.imports[0].source, "./src/entry.js");
326 }
327
328 #[test]
329 fn extracts_plain_script_src() {
330 let info = parse_html_to_module(
331 FileId(0),
332 r#"<script src="./src/polyfills.js"></script>"#,
333 0,
334 );
335 assert_eq!(info.imports.len(), 1);
336 assert_eq!(info.imports[0].source, "./src/polyfills.js");
337 }
338
339 #[test]
340 fn extracts_multiple_scripts() {
341 let info = parse_html_to_module(
342 FileId(0),
343 r#"
344 <script type="module" src="./src/entry.js"></script>
345 <script src="./src/polyfills.js"></script>
346 "#,
347 0,
348 );
349 assert_eq!(info.imports.len(), 2);
350 }
351
352 #[test]
353 fn skips_inline_script() {
354 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
355 assert!(info.imports.is_empty());
356 }
357
358 #[test]
359 fn skips_handlebars_placeholder_in_script_src() {
360 let info = parse_html_to_module(
361 FileId(0),
362 r#"<script src="{{rootURL}}assets/app.js"></script>
363 <script src="{{config.assetsPath}}vendor.js"></script>"#,
364 0,
365 );
366 assert!(
367 info.imports.is_empty(),
368 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
369 info.imports
370 );
371 }
372
373 #[test]
374 fn skips_handlebars_placeholder_in_link_href() {
375 let info = parse_html_to_module(
376 FileId(0),
377 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
378 0,
379 );
380 assert!(info.imports.is_empty());
381 }
382
383 #[test]
384 fn skips_ember_cli_blueprint_placeholder() {
385 let info = parse_html_to_module(
386 FileId(0),
387 r####"<script src="###APPNAME###/app.js"></script>"####,
388 0,
389 );
390 assert!(info.imports.is_empty());
391 }
392
393 #[test]
394 fn extracts_normal_specifier_alongside_placeholders() {
395 let info = parse_html_to_module(
396 FileId(0),
397 r#"<script src="{{rootURL}}assets/app.js"></script>
398 <script src="./src/main.ts"></script>"#,
399 0,
400 );
401 assert_eq!(info.imports.len(), 1);
402 assert_eq!(info.imports[0].source, "./src/main.ts");
403 }
404
405 #[test]
406 fn skips_remote_script() {
407 let info = parse_html_to_module(
408 FileId(0),
409 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
410 0,
411 );
412 assert!(info.imports.is_empty());
413 }
414
415 #[test]
416 fn skips_protocol_relative_script() {
417 let info = parse_html_to_module(
418 FileId(0),
419 r#"<script src="//cdn.example.com/lib.js"></script>"#,
420 0,
421 );
422 assert!(info.imports.is_empty());
423 }
424
425 #[test]
426 fn extracts_stylesheet_link() {
427 let info = parse_html_to_module(
428 FileId(0),
429 r#"<link rel="stylesheet" href="./src/global.css" />"#,
430 0,
431 );
432 assert_eq!(info.imports.len(), 1);
433 assert_eq!(info.imports[0].source, "./src/global.css");
434 }
435
436 #[test]
437 fn extracts_modulepreload_link() {
438 let info = parse_html_to_module(
439 FileId(0),
440 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
441 0,
442 );
443 assert_eq!(info.imports.len(), 1);
444 assert_eq!(info.imports[0].source, "./src/vendor.js");
445 }
446
447 #[test]
448 fn extracts_link_with_reversed_attrs() {
449 let info = parse_html_to_module(
450 FileId(0),
451 r#"<link href="./src/global.css" rel="stylesheet" />"#,
452 0,
453 );
454 assert_eq!(info.imports.len(), 1);
455 assert_eq!(info.imports[0].source, "./src/global.css");
456 }
457
458 #[test]
459 fn bare_script_src_normalized_to_relative() {
460 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
461 assert_eq!(info.imports.len(), 1);
462 assert_eq!(info.imports[0].source, "./app.js");
463 }
464
465 #[test]
466 fn bare_module_script_src_normalized_to_relative() {
467 let info = parse_html_to_module(
468 FileId(0),
469 r#"<script type="module" src="main.ts"></script>"#,
470 0,
471 );
472 assert_eq!(info.imports.len(), 1);
473 assert_eq!(info.imports[0].source, "./main.ts");
474 }
475
476 #[test]
477 fn bare_stylesheet_link_href_normalized_to_relative() {
478 let info = parse_html_to_module(
479 FileId(0),
480 r#"<link rel="stylesheet" href="styles.css" />"#,
481 0,
482 );
483 assert_eq!(info.imports.len(), 1);
484 assert_eq!(info.imports[0].source, "./styles.css");
485 }
486
487 #[test]
488 fn bare_link_href_reversed_attrs_normalized_to_relative() {
489 let info = parse_html_to_module(
490 FileId(0),
491 r#"<link href="styles.css" rel="stylesheet" />"#,
492 0,
493 );
494 assert_eq!(info.imports.len(), 1);
495 assert_eq!(info.imports[0].source, "./styles.css");
496 }
497
498 #[test]
499 fn bare_modulepreload_link_href_normalized_to_relative() {
500 let info = parse_html_to_module(
501 FileId(0),
502 r#"<link rel="modulepreload" href="vendor.js" />"#,
503 0,
504 );
505 assert_eq!(info.imports.len(), 1);
506 assert_eq!(info.imports[0].source, "./vendor.js");
507 }
508
509 #[test]
510 fn bare_asset_with_subdir_normalized_to_relative() {
511 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
512 assert_eq!(info.imports.len(), 1);
513 assert_eq!(info.imports[0].source, "./assets/app.js");
514 }
515
516 #[test]
517 fn root_absolute_script_src_unchanged() {
518 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
519 assert_eq!(info.imports.len(), 1);
520 assert_eq!(info.imports[0].source, "/src/main.ts");
521 }
522
523 #[test]
524 fn parent_relative_script_src_unchanged() {
525 let info = parse_html_to_module(
526 FileId(0),
527 r#"<script src="../shared/vendor.js"></script>"#,
528 0,
529 );
530 assert_eq!(info.imports.len(), 1);
531 assert_eq!(info.imports[0].source, "../shared/vendor.js");
532 }
533
534 #[test]
535 fn skips_preload_link() {
536 let info = parse_html_to_module(
537 FileId(0),
538 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
539 0,
540 );
541 assert!(info.imports.is_empty());
542 }
543
544 #[test]
545 fn skips_icon_link() {
546 let info =
547 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
548 assert!(info.imports.is_empty());
549 }
550
551 #[test]
552 fn skips_remote_stylesheet() {
553 let info = parse_html_to_module(
554 FileId(0),
555 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
556 0,
557 );
558 assert!(info.imports.is_empty());
559 }
560
561 #[test]
562 fn skips_commented_out_script() {
563 let info = parse_html_to_module(
564 FileId(0),
565 r#"<!-- <script src="./old.js"></script> -->
566 <script src="./new.js"></script>"#,
567 0,
568 );
569 assert_eq!(info.imports.len(), 1);
570 assert_eq!(info.imports[0].source, "./new.js");
571 }
572
573 #[test]
574 fn skips_commented_out_link() {
575 let info = parse_html_to_module(
576 FileId(0),
577 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
578 <link rel="stylesheet" href="./new.css" />"#,
579 0,
580 );
581 assert_eq!(info.imports.len(), 1);
582 assert_eq!(info.imports[0].source, "./new.css");
583 }
584
585 #[test]
586 fn handles_multiline_script_tag() {
587 let info = parse_html_to_module(
588 FileId(0),
589 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
590 0,
591 );
592 assert_eq!(info.imports.len(), 1);
593 assert_eq!(info.imports[0].source, "./src/entry.js");
594 }
595
596 #[test]
597 fn handles_multiline_link_tag() {
598 let info = parse_html_to_module(
599 FileId(0),
600 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
601 0,
602 );
603 assert_eq!(info.imports.len(), 1);
604 assert_eq!(info.imports[0].source, "./src/global.css");
605 }
606
607 #[test]
608 fn full_vite_html() {
609 let info = parse_html_to_module(
610 FileId(0),
611 r#"<!doctype html>
612<html>
613 <head>
614 <link rel="stylesheet" href="./src/global.css" />
615 <link rel="icon" href="/favicon.ico" />
616 </head>
617 <body>
618 <div id="app"></div>
619 <script type="module" src="./src/entry.js"></script>
620 </body>
621</html>"#,
622 0,
623 );
624 assert_eq!(info.imports.len(), 2);
625 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
626 assert!(sources.contains(&"./src/global.css"));
627 assert!(sources.contains(&"./src/entry.js"));
628 }
629
630 #[test]
631 fn empty_html() {
632 let info = parse_html_to_module(FileId(0), "", 0);
633 assert!(info.imports.is_empty());
634 }
635
636 #[test]
637 fn html_with_no_assets() {
638 let info = parse_html_to_module(
639 FileId(0),
640 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
641 0,
642 );
643 assert!(info.imports.is_empty());
644 }
645
646 #[test]
647 fn single_quoted_attributes() {
648 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
649 assert_eq!(info.imports.len(), 1);
650 assert_eq!(info.imports[0].source, "./src/entry.js");
651 }
652
653 #[test]
654 fn all_imports_are_side_effect() {
655 let info = parse_html_to_module(
656 FileId(0),
657 r#"<script src="./entry.js"></script>
658 <link rel="stylesheet" href="./style.css" />"#,
659 0,
660 );
661 for imp in &info.imports {
662 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
663 assert!(imp.local_name.is_empty());
664 assert!(!imp.is_type_only);
665 }
666 }
667
668 #[test]
669 fn suppression_comments_extracted() {
670 let info = parse_html_to_module(
671 FileId(0),
672 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
673 0,
674 );
675 assert_eq!(info.imports.len(), 1);
676 }
677
678 #[test]
679 fn angular_template_extracts_member_refs() {
680 let info = parse_html_to_module(
681 FileId(0),
682 "<h1>{{ title() }}</h1>\n\
683 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
684 <button (click)=\"onButtonClick()\">Toggle</button>",
685 0,
686 );
687 let names: rustc_hash::FxHashSet<&str> = info
688 .member_accesses
689 .iter()
690 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
691 .map(|a| a.member.as_str())
692 .collect();
693 assert!(names.contains("title"), "should contain 'title'");
694 assert!(
695 names.contains("isHighlighted"),
696 "should contain 'isHighlighted'"
697 );
698 assert!(names.contains("greeting"), "should contain 'greeting'");
699 assert!(
700 names.contains("onButtonClick"),
701 "should contain 'onButtonClick'"
702 );
703 }
704
705 #[test]
706 fn plain_html_no_angular_refs() {
707 let info = parse_html_to_module(
708 FileId(0),
709 "<!doctype html><html><body><h1>Hello</h1></body></html>",
710 0,
711 );
712 assert!(info.member_accesses.is_empty());
713 }
714}