1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn is_template_placeholder(value: &str) -> bool {
82 value.contains("{{") || value.contains("###")
83}
84
85pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
92 let stripped = HTML_COMMENT_RE.replace_all(source, "");
93 let mut refs: Vec<String> = Vec::new();
94
95 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
96 if let Some(m) = cap.get(1) {
97 let src = m.as_str().trim();
98 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
99 refs.push(src.to_string());
100 }
101 }
102 }
103
104 for cap in LINK_HREF_RE.captures_iter(&stripped) {
105 if let Some(m) = cap.get(2) {
106 let href = m.as_str().trim();
107 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
108 refs.push(href.to_string());
109 }
110 }
111 }
112 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
113 if let Some(m) = cap.get(1) {
114 let href = m.as_str().trim();
115 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
116 refs.push(href.to_string());
117 }
118 }
119 }
120
121 refs
122}
123
124#[cfg(test)]
126pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
127 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
128}
129
130pub(crate) fn parse_html_to_module_with_complexity(
132 file_id: FileId,
133 source: &str,
134 content_hash: u64,
135 need_complexity: bool,
136) -> ModuleInfo {
137 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
138
139 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
142 .into_iter()
143 .map(|raw| ImportInfo {
144 source: normalize_asset_url(&raw),
145 imported_name: ImportedName::SideEffect,
146 local_name: String::new(),
147 is_type_only: false,
148 from_style: false,
149 span: Span::default(),
150 source_span: Span::default(),
151 })
152 .collect();
153
154 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
157 imports.dedup_by(|a, b| a.source == b.source);
158
159 let template_refs = angular::collect_angular_template_refs(source);
172 let mut member_accesses: Vec<MemberAccess> = template_refs
173 .identifiers
174 .into_iter()
175 .map(|name| MemberAccess {
176 object: ANGULAR_TPL_SENTINEL.to_string(),
177 member: name,
178 })
179 .collect();
180 member_accesses.extend(template_refs.member_accesses);
181
182 let complexity = if need_complexity {
183 crate::template_complexity::compute_angular_template_complexity(source)
184 .into_iter()
185 .collect()
186 } else {
187 Vec::new()
188 };
189
190 ModuleInfo {
191 file_id,
192 exports: Vec::new(),
193 imports,
194 re_exports: Vec::new(),
195 dynamic_imports: Vec::new(),
196 dynamic_import_patterns: Vec::new(),
197 require_calls: Vec::new(),
198 member_accesses,
199 whole_object_uses: Vec::new(),
200 has_cjs_exports: false,
201 has_angular_component_template_url: false,
202 content_hash,
203 suppressions: parsed_suppressions.suppressions,
204 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
205 unused_import_bindings: Vec::new(),
206 type_referenced_import_bindings: Vec::new(),
207 value_referenced_import_bindings: Vec::new(),
208 line_offsets: fallow_types::extract::compute_line_offsets(source),
209 complexity,
210 flag_uses: Vec::new(),
211 class_heritage: vec![],
212 local_type_declarations: Vec::new(),
213 public_signature_type_references: Vec::new(),
214 namespace_object_aliases: Vec::new(),
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221
222 #[test]
225 fn is_html_file_html() {
226 assert!(is_html_file(Path::new("index.html")));
227 }
228
229 #[test]
230 fn is_html_file_nested() {
231 assert!(is_html_file(Path::new("pages/about.html")));
232 }
233
234 #[test]
235 fn is_html_file_rejects_htm() {
236 assert!(!is_html_file(Path::new("index.htm")));
237 }
238
239 #[test]
240 fn is_html_file_rejects_js() {
241 assert!(!is_html_file(Path::new("app.js")));
242 }
243
244 #[test]
245 fn is_html_file_rejects_ts() {
246 assert!(!is_html_file(Path::new("app.ts")));
247 }
248
249 #[test]
250 fn is_html_file_rejects_vue() {
251 assert!(!is_html_file(Path::new("App.vue")));
252 }
253
254 #[test]
257 fn remote_url_http() {
258 assert!(is_remote_url("http://example.com/script.js"));
259 }
260
261 #[test]
262 fn remote_url_https() {
263 assert!(is_remote_url("https://cdn.example.com/style.css"));
264 }
265
266 #[test]
267 fn remote_url_protocol_relative() {
268 assert!(is_remote_url("//cdn.example.com/lib.js"));
269 }
270
271 #[test]
272 fn remote_url_data() {
273 assert!(is_remote_url("data:text/javascript;base64,abc"));
274 }
275
276 #[test]
277 fn local_relative_not_remote() {
278 assert!(!is_remote_url("./src/entry.js"));
279 }
280
281 #[test]
282 fn local_root_relative_not_remote() {
283 assert!(!is_remote_url("/src/entry.js"));
284 }
285
286 #[test]
289 fn extracts_module_script_src() {
290 let info = parse_html_to_module(
291 FileId(0),
292 r#"<script type="module" src="./src/entry.js"></script>"#,
293 0,
294 );
295 assert_eq!(info.imports.len(), 1);
296 assert_eq!(info.imports[0].source, "./src/entry.js");
297 }
298
299 #[test]
300 fn extracts_plain_script_src() {
301 let info = parse_html_to_module(
302 FileId(0),
303 r#"<script src="./src/polyfills.js"></script>"#,
304 0,
305 );
306 assert_eq!(info.imports.len(), 1);
307 assert_eq!(info.imports[0].source, "./src/polyfills.js");
308 }
309
310 #[test]
311 fn extracts_multiple_scripts() {
312 let info = parse_html_to_module(
313 FileId(0),
314 r#"
315 <script type="module" src="./src/entry.js"></script>
316 <script src="./src/polyfills.js"></script>
317 "#,
318 0,
319 );
320 assert_eq!(info.imports.len(), 2);
321 }
322
323 #[test]
324 fn skips_inline_script() {
325 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
326 assert!(info.imports.is_empty());
327 }
328
329 #[test]
330 fn skips_handlebars_placeholder_in_script_src() {
331 let info = parse_html_to_module(
338 FileId(0),
339 r#"<script src="{{rootURL}}assets/app.js"></script>
340 <script src="{{config.assetsPath}}vendor.js"></script>"#,
341 0,
342 );
343 assert!(
344 info.imports.is_empty(),
345 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
346 info.imports
347 );
348 }
349
350 #[test]
351 fn skips_handlebars_placeholder_in_link_href() {
352 let info = parse_html_to_module(
354 FileId(0),
355 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
356 0,
357 );
358 assert!(info.imports.is_empty());
359 }
360
361 #[test]
362 fn skips_ember_cli_blueprint_placeholder() {
363 let info = parse_html_to_module(
366 FileId(0),
367 r####"<script src="###APPNAME###/app.js"></script>"####,
368 0,
369 );
370 assert!(info.imports.is_empty());
371 }
372
373 #[test]
374 fn extracts_normal_specifier_alongside_placeholders() {
375 let info = parse_html_to_module(
379 FileId(0),
380 r#"<script src="{{rootURL}}assets/app.js"></script>
381 <script src="./src/main.ts"></script>"#,
382 0,
383 );
384 assert_eq!(info.imports.len(), 1);
385 assert_eq!(info.imports[0].source, "./src/main.ts");
386 }
387
388 #[test]
389 fn skips_remote_script() {
390 let info = parse_html_to_module(
391 FileId(0),
392 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
393 0,
394 );
395 assert!(info.imports.is_empty());
396 }
397
398 #[test]
399 fn skips_protocol_relative_script() {
400 let info = parse_html_to_module(
401 FileId(0),
402 r#"<script src="//cdn.example.com/lib.js"></script>"#,
403 0,
404 );
405 assert!(info.imports.is_empty());
406 }
407
408 #[test]
411 fn extracts_stylesheet_link() {
412 let info = parse_html_to_module(
413 FileId(0),
414 r#"<link rel="stylesheet" href="./src/global.css" />"#,
415 0,
416 );
417 assert_eq!(info.imports.len(), 1);
418 assert_eq!(info.imports[0].source, "./src/global.css");
419 }
420
421 #[test]
422 fn extracts_modulepreload_link() {
423 let info = parse_html_to_module(
424 FileId(0),
425 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
426 0,
427 );
428 assert_eq!(info.imports.len(), 1);
429 assert_eq!(info.imports[0].source, "./src/vendor.js");
430 }
431
432 #[test]
433 fn extracts_link_with_reversed_attrs() {
434 let info = parse_html_to_module(
435 FileId(0),
436 r#"<link href="./src/global.css" rel="stylesheet" />"#,
437 0,
438 );
439 assert_eq!(info.imports.len(), 1);
440 assert_eq!(info.imports[0].source, "./src/global.css");
441 }
442
443 #[test]
450 fn bare_script_src_normalized_to_relative() {
451 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
452 assert_eq!(info.imports.len(), 1);
453 assert_eq!(info.imports[0].source, "./app.js");
454 }
455
456 #[test]
457 fn bare_module_script_src_normalized_to_relative() {
458 let info = parse_html_to_module(
459 FileId(0),
460 r#"<script type="module" src="main.ts"></script>"#,
461 0,
462 );
463 assert_eq!(info.imports.len(), 1);
464 assert_eq!(info.imports[0].source, "./main.ts");
465 }
466
467 #[test]
468 fn bare_stylesheet_link_href_normalized_to_relative() {
469 let info = parse_html_to_module(
470 FileId(0),
471 r#"<link rel="stylesheet" href="styles.css" />"#,
472 0,
473 );
474 assert_eq!(info.imports.len(), 1);
475 assert_eq!(info.imports[0].source, "./styles.css");
476 }
477
478 #[test]
479 fn bare_link_href_reversed_attrs_normalized_to_relative() {
480 let info = parse_html_to_module(
481 FileId(0),
482 r#"<link href="styles.css" rel="stylesheet" />"#,
483 0,
484 );
485 assert_eq!(info.imports.len(), 1);
486 assert_eq!(info.imports[0].source, "./styles.css");
487 }
488
489 #[test]
490 fn bare_modulepreload_link_href_normalized_to_relative() {
491 let info = parse_html_to_module(
492 FileId(0),
493 r#"<link rel="modulepreload" href="vendor.js" />"#,
494 0,
495 );
496 assert_eq!(info.imports.len(), 1);
497 assert_eq!(info.imports[0].source, "./vendor.js");
498 }
499
500 #[test]
501 fn bare_asset_with_subdir_normalized_to_relative() {
502 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
503 assert_eq!(info.imports.len(), 1);
504 assert_eq!(info.imports[0].source, "./assets/app.js");
505 }
506
507 #[test]
508 fn root_absolute_script_src_unchanged() {
509 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
512 assert_eq!(info.imports.len(), 1);
513 assert_eq!(info.imports[0].source, "/src/main.ts");
514 }
515
516 #[test]
517 fn parent_relative_script_src_unchanged() {
518 let info = parse_html_to_module(
519 FileId(0),
520 r#"<script src="../shared/vendor.js"></script>"#,
521 0,
522 );
523 assert_eq!(info.imports.len(), 1);
524 assert_eq!(info.imports[0].source, "../shared/vendor.js");
525 }
526
527 #[test]
528 fn skips_preload_link() {
529 let info = parse_html_to_module(
530 FileId(0),
531 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
532 0,
533 );
534 assert!(info.imports.is_empty());
535 }
536
537 #[test]
538 fn skips_icon_link() {
539 let info =
540 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
541 assert!(info.imports.is_empty());
542 }
543
544 #[test]
545 fn skips_remote_stylesheet() {
546 let info = parse_html_to_module(
547 FileId(0),
548 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
549 0,
550 );
551 assert!(info.imports.is_empty());
552 }
553
554 #[test]
557 fn skips_commented_out_script() {
558 let info = parse_html_to_module(
559 FileId(0),
560 r#"<!-- <script src="./old.js"></script> -->
561 <script src="./new.js"></script>"#,
562 0,
563 );
564 assert_eq!(info.imports.len(), 1);
565 assert_eq!(info.imports[0].source, "./new.js");
566 }
567
568 #[test]
569 fn skips_commented_out_link() {
570 let info = parse_html_to_module(
571 FileId(0),
572 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
573 <link rel="stylesheet" href="./new.css" />"#,
574 0,
575 );
576 assert_eq!(info.imports.len(), 1);
577 assert_eq!(info.imports[0].source, "./new.css");
578 }
579
580 #[test]
583 fn handles_multiline_script_tag() {
584 let info = parse_html_to_module(
585 FileId(0),
586 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
587 0,
588 );
589 assert_eq!(info.imports.len(), 1);
590 assert_eq!(info.imports[0].source, "./src/entry.js");
591 }
592
593 #[test]
594 fn handles_multiline_link_tag() {
595 let info = parse_html_to_module(
596 FileId(0),
597 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
598 0,
599 );
600 assert_eq!(info.imports.len(), 1);
601 assert_eq!(info.imports[0].source, "./src/global.css");
602 }
603
604 #[test]
607 fn full_vite_html() {
608 let info = parse_html_to_module(
609 FileId(0),
610 r#"<!doctype html>
611<html>
612 <head>
613 <link rel="stylesheet" href="./src/global.css" />
614 <link rel="icon" href="/favicon.ico" />
615 </head>
616 <body>
617 <div id="app"></div>
618 <script type="module" src="./src/entry.js"></script>
619 </body>
620</html>"#,
621 0,
622 );
623 assert_eq!(info.imports.len(), 2);
624 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
625 assert!(sources.contains(&"./src/global.css"));
626 assert!(sources.contains(&"./src/entry.js"));
627 }
628
629 #[test]
632 fn empty_html() {
633 let info = parse_html_to_module(FileId(0), "", 0);
634 assert!(info.imports.is_empty());
635 }
636
637 #[test]
638 fn html_with_no_assets() {
639 let info = parse_html_to_module(
640 FileId(0),
641 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
642 0,
643 );
644 assert!(info.imports.is_empty());
645 }
646
647 #[test]
648 fn single_quoted_attributes() {
649 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
650 assert_eq!(info.imports.len(), 1);
651 assert_eq!(info.imports[0].source, "./src/entry.js");
652 }
653
654 #[test]
655 fn all_imports_are_side_effect() {
656 let info = parse_html_to_module(
657 FileId(0),
658 r#"<script src="./entry.js"></script>
659 <link rel="stylesheet" href="./style.css" />"#,
660 0,
661 );
662 for imp in &info.imports {
663 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
664 assert!(imp.local_name.is_empty());
665 assert!(!imp.is_type_only);
666 }
667 }
668
669 #[test]
670 fn suppression_comments_extracted() {
671 let info = parse_html_to_module(
672 FileId(0),
673 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
674 0,
675 );
676 assert_eq!(info.imports.len(), 1);
680 }
681
682 #[test]
685 fn angular_template_extracts_member_refs() {
686 let info = parse_html_to_module(
687 FileId(0),
688 "<h1>{{ title() }}</h1>\n\
689 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
690 <button (click)=\"onButtonClick()\">Toggle</button>",
691 0,
692 );
693 let names: rustc_hash::FxHashSet<&str> = info
694 .member_accesses
695 .iter()
696 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
697 .map(|a| a.member.as_str())
698 .collect();
699 assert!(names.contains("title"), "should contain 'title'");
700 assert!(
701 names.contains("isHighlighted"),
702 "should contain 'isHighlighted'"
703 );
704 assert!(names.contains("greeting"), "should contain 'greeting'");
705 assert!(
706 names.contains("onButtonClick"),
707 "should contain 'onButtonClick'"
708 );
709 }
710
711 #[test]
712 fn plain_html_no_angular_refs() {
713 let info = parse_html_to_module(
714 FileId(0),
715 "<!doctype html><html><body><h1>Hello</h1></body></html>",
716 0,
717 );
718 assert!(info.member_accesses.is_empty());
719 }
720}