1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn is_template_placeholder(value: &str) -> bool {
82 value.contains("{{") || value.contains("###")
83}
84
85pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
92 let stripped = HTML_COMMENT_RE.replace_all(source, "");
93 let mut refs: Vec<String> = Vec::new();
94
95 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
96 if let Some(m) = cap.get(1) {
97 let src = m.as_str().trim();
98 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
99 refs.push(src.to_string());
100 }
101 }
102 }
103
104 for cap in LINK_HREF_RE.captures_iter(&stripped) {
105 if let Some(m) = cap.get(2) {
106 let href = m.as_str().trim();
107 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
108 refs.push(href.to_string());
109 }
110 }
111 }
112 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
113 if let Some(m) = cap.get(1) {
114 let href = m.as_str().trim();
115 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
116 refs.push(href.to_string());
117 }
118 }
119 }
120
121 refs
122}
123
124#[cfg(test)]
126pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
127 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
128}
129
130pub(crate) fn parse_html_to_module_with_complexity(
132 file_id: FileId,
133 source: &str,
134 content_hash: u64,
135 need_complexity: bool,
136) -> ModuleInfo {
137 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
138
139 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
142 .into_iter()
143 .map(|raw| ImportInfo {
144 source: normalize_asset_url(&raw),
145 imported_name: ImportedName::SideEffect,
146 local_name: String::new(),
147 is_type_only: false,
148 from_style: false,
149 span: Span::default(),
150 source_span: Span::default(),
151 })
152 .collect();
153
154 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
157 imports.dedup_by(|a, b| a.source == b.source);
158
159 let template_refs = angular::collect_angular_template_refs(source);
172 let mut member_accesses: Vec<MemberAccess> = template_refs
173 .identifiers
174 .into_iter()
175 .map(|name| MemberAccess {
176 object: ANGULAR_TPL_SENTINEL.to_string(),
177 member: name,
178 })
179 .collect();
180 member_accesses.extend(template_refs.member_accesses);
181
182 let complexity = if need_complexity {
183 crate::template_complexity::compute_angular_template_complexity(source)
184 .into_iter()
185 .collect()
186 } else {
187 Vec::new()
188 };
189
190 ModuleInfo {
191 file_id,
192 exports: Vec::new(),
193 imports,
194 re_exports: Vec::new(),
195 dynamic_imports: Vec::new(),
196 dynamic_import_patterns: Vec::new(),
197 require_calls: Vec::new(),
198 member_accesses,
199 whole_object_uses: Vec::new(),
200 has_cjs_exports: false,
201 has_angular_component_template_url: false,
202 content_hash,
203 suppressions: parsed_suppressions.suppressions,
204 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
205 unused_import_bindings: Vec::new(),
206 type_referenced_import_bindings: Vec::new(),
207 value_referenced_import_bindings: Vec::new(),
208 line_offsets: fallow_types::extract::compute_line_offsets(source),
209 complexity,
210 flag_uses: Vec::new(),
211 class_heritage: vec![],
212 local_type_declarations: Vec::new(),
213 public_signature_type_references: Vec::new(),
214 namespace_object_aliases: Vec::new(),
215 iconify_prefixes: Vec::new(),
216 auto_import_candidates: Vec::new(),
217 }
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 #[test]
227 fn is_html_file_html() {
228 assert!(is_html_file(Path::new("index.html")));
229 }
230
231 #[test]
232 fn is_html_file_nested() {
233 assert!(is_html_file(Path::new("pages/about.html")));
234 }
235
236 #[test]
237 fn is_html_file_rejects_htm() {
238 assert!(!is_html_file(Path::new("index.htm")));
239 }
240
241 #[test]
242 fn is_html_file_rejects_js() {
243 assert!(!is_html_file(Path::new("app.js")));
244 }
245
246 #[test]
247 fn is_html_file_rejects_ts() {
248 assert!(!is_html_file(Path::new("app.ts")));
249 }
250
251 #[test]
252 fn is_html_file_rejects_vue() {
253 assert!(!is_html_file(Path::new("App.vue")));
254 }
255
256 #[test]
259 fn remote_url_http() {
260 assert!(is_remote_url("http://example.com/script.js"));
261 }
262
263 #[test]
264 fn remote_url_https() {
265 assert!(is_remote_url("https://cdn.example.com/style.css"));
266 }
267
268 #[test]
269 fn remote_url_protocol_relative() {
270 assert!(is_remote_url("//cdn.example.com/lib.js"));
271 }
272
273 #[test]
274 fn remote_url_data() {
275 assert!(is_remote_url("data:text/javascript;base64,abc"));
276 }
277
278 #[test]
279 fn local_relative_not_remote() {
280 assert!(!is_remote_url("./src/entry.js"));
281 }
282
283 #[test]
284 fn local_root_relative_not_remote() {
285 assert!(!is_remote_url("/src/entry.js"));
286 }
287
288 #[test]
291 fn extracts_module_script_src() {
292 let info = parse_html_to_module(
293 FileId(0),
294 r#"<script type="module" src="./src/entry.js"></script>"#,
295 0,
296 );
297 assert_eq!(info.imports.len(), 1);
298 assert_eq!(info.imports[0].source, "./src/entry.js");
299 }
300
301 #[test]
302 fn extracts_plain_script_src() {
303 let info = parse_html_to_module(
304 FileId(0),
305 r#"<script src="./src/polyfills.js"></script>"#,
306 0,
307 );
308 assert_eq!(info.imports.len(), 1);
309 assert_eq!(info.imports[0].source, "./src/polyfills.js");
310 }
311
312 #[test]
313 fn extracts_multiple_scripts() {
314 let info = parse_html_to_module(
315 FileId(0),
316 r#"
317 <script type="module" src="./src/entry.js"></script>
318 <script src="./src/polyfills.js"></script>
319 "#,
320 0,
321 );
322 assert_eq!(info.imports.len(), 2);
323 }
324
325 #[test]
326 fn skips_inline_script() {
327 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
328 assert!(info.imports.is_empty());
329 }
330
331 #[test]
332 fn skips_handlebars_placeholder_in_script_src() {
333 let info = parse_html_to_module(
340 FileId(0),
341 r#"<script src="{{rootURL}}assets/app.js"></script>
342 <script src="{{config.assetsPath}}vendor.js"></script>"#,
343 0,
344 );
345 assert!(
346 info.imports.is_empty(),
347 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
348 info.imports
349 );
350 }
351
352 #[test]
353 fn skips_handlebars_placeholder_in_link_href() {
354 let info = parse_html_to_module(
356 FileId(0),
357 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
358 0,
359 );
360 assert!(info.imports.is_empty());
361 }
362
363 #[test]
364 fn skips_ember_cli_blueprint_placeholder() {
365 let info = parse_html_to_module(
368 FileId(0),
369 r####"<script src="###APPNAME###/app.js"></script>"####,
370 0,
371 );
372 assert!(info.imports.is_empty());
373 }
374
375 #[test]
376 fn extracts_normal_specifier_alongside_placeholders() {
377 let info = parse_html_to_module(
381 FileId(0),
382 r#"<script src="{{rootURL}}assets/app.js"></script>
383 <script src="./src/main.ts"></script>"#,
384 0,
385 );
386 assert_eq!(info.imports.len(), 1);
387 assert_eq!(info.imports[0].source, "./src/main.ts");
388 }
389
390 #[test]
391 fn skips_remote_script() {
392 let info = parse_html_to_module(
393 FileId(0),
394 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
395 0,
396 );
397 assert!(info.imports.is_empty());
398 }
399
400 #[test]
401 fn skips_protocol_relative_script() {
402 let info = parse_html_to_module(
403 FileId(0),
404 r#"<script src="//cdn.example.com/lib.js"></script>"#,
405 0,
406 );
407 assert!(info.imports.is_empty());
408 }
409
410 #[test]
413 fn extracts_stylesheet_link() {
414 let info = parse_html_to_module(
415 FileId(0),
416 r#"<link rel="stylesheet" href="./src/global.css" />"#,
417 0,
418 );
419 assert_eq!(info.imports.len(), 1);
420 assert_eq!(info.imports[0].source, "./src/global.css");
421 }
422
423 #[test]
424 fn extracts_modulepreload_link() {
425 let info = parse_html_to_module(
426 FileId(0),
427 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
428 0,
429 );
430 assert_eq!(info.imports.len(), 1);
431 assert_eq!(info.imports[0].source, "./src/vendor.js");
432 }
433
434 #[test]
435 fn extracts_link_with_reversed_attrs() {
436 let info = parse_html_to_module(
437 FileId(0),
438 r#"<link href="./src/global.css" rel="stylesheet" />"#,
439 0,
440 );
441 assert_eq!(info.imports.len(), 1);
442 assert_eq!(info.imports[0].source, "./src/global.css");
443 }
444
445 #[test]
452 fn bare_script_src_normalized_to_relative() {
453 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
454 assert_eq!(info.imports.len(), 1);
455 assert_eq!(info.imports[0].source, "./app.js");
456 }
457
458 #[test]
459 fn bare_module_script_src_normalized_to_relative() {
460 let info = parse_html_to_module(
461 FileId(0),
462 r#"<script type="module" src="main.ts"></script>"#,
463 0,
464 );
465 assert_eq!(info.imports.len(), 1);
466 assert_eq!(info.imports[0].source, "./main.ts");
467 }
468
469 #[test]
470 fn bare_stylesheet_link_href_normalized_to_relative() {
471 let info = parse_html_to_module(
472 FileId(0),
473 r#"<link rel="stylesheet" href="styles.css" />"#,
474 0,
475 );
476 assert_eq!(info.imports.len(), 1);
477 assert_eq!(info.imports[0].source, "./styles.css");
478 }
479
480 #[test]
481 fn bare_link_href_reversed_attrs_normalized_to_relative() {
482 let info = parse_html_to_module(
483 FileId(0),
484 r#"<link href="styles.css" rel="stylesheet" />"#,
485 0,
486 );
487 assert_eq!(info.imports.len(), 1);
488 assert_eq!(info.imports[0].source, "./styles.css");
489 }
490
491 #[test]
492 fn bare_modulepreload_link_href_normalized_to_relative() {
493 let info = parse_html_to_module(
494 FileId(0),
495 r#"<link rel="modulepreload" href="vendor.js" />"#,
496 0,
497 );
498 assert_eq!(info.imports.len(), 1);
499 assert_eq!(info.imports[0].source, "./vendor.js");
500 }
501
502 #[test]
503 fn bare_asset_with_subdir_normalized_to_relative() {
504 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
505 assert_eq!(info.imports.len(), 1);
506 assert_eq!(info.imports[0].source, "./assets/app.js");
507 }
508
509 #[test]
510 fn root_absolute_script_src_unchanged() {
511 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
514 assert_eq!(info.imports.len(), 1);
515 assert_eq!(info.imports[0].source, "/src/main.ts");
516 }
517
518 #[test]
519 fn parent_relative_script_src_unchanged() {
520 let info = parse_html_to_module(
521 FileId(0),
522 r#"<script src="../shared/vendor.js"></script>"#,
523 0,
524 );
525 assert_eq!(info.imports.len(), 1);
526 assert_eq!(info.imports[0].source, "../shared/vendor.js");
527 }
528
529 #[test]
530 fn skips_preload_link() {
531 let info = parse_html_to_module(
532 FileId(0),
533 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
534 0,
535 );
536 assert!(info.imports.is_empty());
537 }
538
539 #[test]
540 fn skips_icon_link() {
541 let info =
542 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
543 assert!(info.imports.is_empty());
544 }
545
546 #[test]
547 fn skips_remote_stylesheet() {
548 let info = parse_html_to_module(
549 FileId(0),
550 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
551 0,
552 );
553 assert!(info.imports.is_empty());
554 }
555
556 #[test]
559 fn skips_commented_out_script() {
560 let info = parse_html_to_module(
561 FileId(0),
562 r#"<!-- <script src="./old.js"></script> -->
563 <script src="./new.js"></script>"#,
564 0,
565 );
566 assert_eq!(info.imports.len(), 1);
567 assert_eq!(info.imports[0].source, "./new.js");
568 }
569
570 #[test]
571 fn skips_commented_out_link() {
572 let info = parse_html_to_module(
573 FileId(0),
574 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
575 <link rel="stylesheet" href="./new.css" />"#,
576 0,
577 );
578 assert_eq!(info.imports.len(), 1);
579 assert_eq!(info.imports[0].source, "./new.css");
580 }
581
582 #[test]
585 fn handles_multiline_script_tag() {
586 let info = parse_html_to_module(
587 FileId(0),
588 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
589 0,
590 );
591 assert_eq!(info.imports.len(), 1);
592 assert_eq!(info.imports[0].source, "./src/entry.js");
593 }
594
595 #[test]
596 fn handles_multiline_link_tag() {
597 let info = parse_html_to_module(
598 FileId(0),
599 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
600 0,
601 );
602 assert_eq!(info.imports.len(), 1);
603 assert_eq!(info.imports[0].source, "./src/global.css");
604 }
605
606 #[test]
609 fn full_vite_html() {
610 let info = parse_html_to_module(
611 FileId(0),
612 r#"<!doctype html>
613<html>
614 <head>
615 <link rel="stylesheet" href="./src/global.css" />
616 <link rel="icon" href="/favicon.ico" />
617 </head>
618 <body>
619 <div id="app"></div>
620 <script type="module" src="./src/entry.js"></script>
621 </body>
622</html>"#,
623 0,
624 );
625 assert_eq!(info.imports.len(), 2);
626 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
627 assert!(sources.contains(&"./src/global.css"));
628 assert!(sources.contains(&"./src/entry.js"));
629 }
630
631 #[test]
634 fn empty_html() {
635 let info = parse_html_to_module(FileId(0), "", 0);
636 assert!(info.imports.is_empty());
637 }
638
639 #[test]
640 fn html_with_no_assets() {
641 let info = parse_html_to_module(
642 FileId(0),
643 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
644 0,
645 );
646 assert!(info.imports.is_empty());
647 }
648
649 #[test]
650 fn single_quoted_attributes() {
651 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
652 assert_eq!(info.imports.len(), 1);
653 assert_eq!(info.imports[0].source, "./src/entry.js");
654 }
655
656 #[test]
657 fn all_imports_are_side_effect() {
658 let info = parse_html_to_module(
659 FileId(0),
660 r#"<script src="./entry.js"></script>
661 <link rel="stylesheet" href="./style.css" />"#,
662 0,
663 );
664 for imp in &info.imports {
665 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
666 assert!(imp.local_name.is_empty());
667 assert!(!imp.is_type_only);
668 }
669 }
670
671 #[test]
672 fn suppression_comments_extracted() {
673 let info = parse_html_to_module(
674 FileId(0),
675 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
676 0,
677 );
678 assert_eq!(info.imports.len(), 1);
682 }
683
684 #[test]
687 fn angular_template_extracts_member_refs() {
688 let info = parse_html_to_module(
689 FileId(0),
690 "<h1>{{ title() }}</h1>\n\
691 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
692 <button (click)=\"onButtonClick()\">Toggle</button>",
693 0,
694 );
695 let names: rustc_hash::FxHashSet<&str> = info
696 .member_accesses
697 .iter()
698 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
699 .map(|a| a.member.as_str())
700 .collect();
701 assert!(names.contains("title"), "should contain 'title'");
702 assert!(
703 names.contains("isHighlighted"),
704 "should contain 'isHighlighted'"
705 );
706 assert!(names.contains("greeting"), "should contain 'greeting'");
707 assert!(
708 names.contains("onButtonClick"),
709 "should contain 'onButtonClick'"
710 );
711 }
712
713 #[test]
714 fn plain_html_no_angular_refs() {
715 let info = parse_html_to_module(
716 FileId(0),
717 "<!doctype html><html><body><h1>Hello</h1></body></html>",
718 0,
719 );
720 assert!(info.member_accesses.is_empty());
721 }
722}