1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn is_template_placeholder(value: &str) -> bool {
82 value.contains("{{") || value.contains("###")
83}
84
85pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
92 let stripped = HTML_COMMENT_RE.replace_all(source, "");
93 let mut refs: Vec<String> = Vec::new();
94
95 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
96 if let Some(m) = cap.get(1) {
97 let src = m.as_str().trim();
98 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
99 refs.push(src.to_string());
100 }
101 }
102 }
103
104 for cap in LINK_HREF_RE.captures_iter(&stripped) {
105 if let Some(m) = cap.get(2) {
106 let href = m.as_str().trim();
107 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
108 refs.push(href.to_string());
109 }
110 }
111 }
112 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
113 if let Some(m) = cap.get(1) {
114 let href = m.as_str().trim();
115 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
116 refs.push(href.to_string());
117 }
118 }
119 }
120
121 refs
122}
123
124#[cfg(test)]
126pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
127 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
128}
129
130pub(crate) fn parse_html_to_module_with_complexity(
132 file_id: FileId,
133 source: &str,
134 content_hash: u64,
135 need_complexity: bool,
136) -> ModuleInfo {
137 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
138
139 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
142 .into_iter()
143 .map(|raw| ImportInfo {
144 source: normalize_asset_url(&raw),
145 imported_name: ImportedName::SideEffect,
146 local_name: String::new(),
147 is_type_only: false,
148 from_style: false,
149 span: Span::default(),
150 source_span: Span::default(),
151 })
152 .collect();
153
154 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
157 imports.dedup_by(|a, b| a.source == b.source);
158
159 let template_refs = angular::collect_angular_template_refs(source);
172 let mut member_accesses: Vec<MemberAccess> = template_refs
173 .identifiers
174 .into_iter()
175 .map(|name| MemberAccess {
176 object: ANGULAR_TPL_SENTINEL.to_string(),
177 member: name,
178 })
179 .collect();
180 member_accesses.extend(template_refs.member_accesses);
181
182 let complexity = if need_complexity {
183 crate::template_complexity::compute_angular_template_complexity(source)
184 .into_iter()
185 .collect()
186 } else {
187 Vec::new()
188 };
189
190 ModuleInfo {
191 file_id,
192 exports: Vec::new(),
193 imports,
194 re_exports: Vec::new(),
195 dynamic_imports: Vec::new(),
196 dynamic_import_patterns: Vec::new(),
197 require_calls: Vec::new(),
198 member_accesses,
199 whole_object_uses: Vec::new(),
200 has_cjs_exports: false,
201 has_angular_component_template_url: false,
202 content_hash,
203 suppressions: parsed_suppressions.suppressions,
204 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
205 unused_import_bindings: Vec::new(),
206 type_referenced_import_bindings: Vec::new(),
207 value_referenced_import_bindings: Vec::new(),
208 line_offsets: fallow_types::extract::compute_line_offsets(source),
209 complexity,
210 flag_uses: Vec::new(),
211 class_heritage: vec![],
212 local_type_declarations: Vec::new(),
213 public_signature_type_references: Vec::new(),
214 namespace_object_aliases: Vec::new(),
215 iconify_prefixes: Vec::new(),
216 }
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222
223 #[test]
226 fn is_html_file_html() {
227 assert!(is_html_file(Path::new("index.html")));
228 }
229
230 #[test]
231 fn is_html_file_nested() {
232 assert!(is_html_file(Path::new("pages/about.html")));
233 }
234
235 #[test]
236 fn is_html_file_rejects_htm() {
237 assert!(!is_html_file(Path::new("index.htm")));
238 }
239
240 #[test]
241 fn is_html_file_rejects_js() {
242 assert!(!is_html_file(Path::new("app.js")));
243 }
244
245 #[test]
246 fn is_html_file_rejects_ts() {
247 assert!(!is_html_file(Path::new("app.ts")));
248 }
249
250 #[test]
251 fn is_html_file_rejects_vue() {
252 assert!(!is_html_file(Path::new("App.vue")));
253 }
254
255 #[test]
258 fn remote_url_http() {
259 assert!(is_remote_url("http://example.com/script.js"));
260 }
261
262 #[test]
263 fn remote_url_https() {
264 assert!(is_remote_url("https://cdn.example.com/style.css"));
265 }
266
267 #[test]
268 fn remote_url_protocol_relative() {
269 assert!(is_remote_url("//cdn.example.com/lib.js"));
270 }
271
272 #[test]
273 fn remote_url_data() {
274 assert!(is_remote_url("data:text/javascript;base64,abc"));
275 }
276
277 #[test]
278 fn local_relative_not_remote() {
279 assert!(!is_remote_url("./src/entry.js"));
280 }
281
282 #[test]
283 fn local_root_relative_not_remote() {
284 assert!(!is_remote_url("/src/entry.js"));
285 }
286
287 #[test]
290 fn extracts_module_script_src() {
291 let info = parse_html_to_module(
292 FileId(0),
293 r#"<script type="module" src="./src/entry.js"></script>"#,
294 0,
295 );
296 assert_eq!(info.imports.len(), 1);
297 assert_eq!(info.imports[0].source, "./src/entry.js");
298 }
299
300 #[test]
301 fn extracts_plain_script_src() {
302 let info = parse_html_to_module(
303 FileId(0),
304 r#"<script src="./src/polyfills.js"></script>"#,
305 0,
306 );
307 assert_eq!(info.imports.len(), 1);
308 assert_eq!(info.imports[0].source, "./src/polyfills.js");
309 }
310
311 #[test]
312 fn extracts_multiple_scripts() {
313 let info = parse_html_to_module(
314 FileId(0),
315 r#"
316 <script type="module" src="./src/entry.js"></script>
317 <script src="./src/polyfills.js"></script>
318 "#,
319 0,
320 );
321 assert_eq!(info.imports.len(), 2);
322 }
323
324 #[test]
325 fn skips_inline_script() {
326 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
327 assert!(info.imports.is_empty());
328 }
329
330 #[test]
331 fn skips_handlebars_placeholder_in_script_src() {
332 let info = parse_html_to_module(
339 FileId(0),
340 r#"<script src="{{rootURL}}assets/app.js"></script>
341 <script src="{{config.assetsPath}}vendor.js"></script>"#,
342 0,
343 );
344 assert!(
345 info.imports.is_empty(),
346 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
347 info.imports
348 );
349 }
350
351 #[test]
352 fn skips_handlebars_placeholder_in_link_href() {
353 let info = parse_html_to_module(
355 FileId(0),
356 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
357 0,
358 );
359 assert!(info.imports.is_empty());
360 }
361
362 #[test]
363 fn skips_ember_cli_blueprint_placeholder() {
364 let info = parse_html_to_module(
367 FileId(0),
368 r####"<script src="###APPNAME###/app.js"></script>"####,
369 0,
370 );
371 assert!(info.imports.is_empty());
372 }
373
374 #[test]
375 fn extracts_normal_specifier_alongside_placeholders() {
376 let info = parse_html_to_module(
380 FileId(0),
381 r#"<script src="{{rootURL}}assets/app.js"></script>
382 <script src="./src/main.ts"></script>"#,
383 0,
384 );
385 assert_eq!(info.imports.len(), 1);
386 assert_eq!(info.imports[0].source, "./src/main.ts");
387 }
388
389 #[test]
390 fn skips_remote_script() {
391 let info = parse_html_to_module(
392 FileId(0),
393 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
394 0,
395 );
396 assert!(info.imports.is_empty());
397 }
398
399 #[test]
400 fn skips_protocol_relative_script() {
401 let info = parse_html_to_module(
402 FileId(0),
403 r#"<script src="//cdn.example.com/lib.js"></script>"#,
404 0,
405 );
406 assert!(info.imports.is_empty());
407 }
408
409 #[test]
412 fn extracts_stylesheet_link() {
413 let info = parse_html_to_module(
414 FileId(0),
415 r#"<link rel="stylesheet" href="./src/global.css" />"#,
416 0,
417 );
418 assert_eq!(info.imports.len(), 1);
419 assert_eq!(info.imports[0].source, "./src/global.css");
420 }
421
422 #[test]
423 fn extracts_modulepreload_link() {
424 let info = parse_html_to_module(
425 FileId(0),
426 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
427 0,
428 );
429 assert_eq!(info.imports.len(), 1);
430 assert_eq!(info.imports[0].source, "./src/vendor.js");
431 }
432
433 #[test]
434 fn extracts_link_with_reversed_attrs() {
435 let info = parse_html_to_module(
436 FileId(0),
437 r#"<link href="./src/global.css" rel="stylesheet" />"#,
438 0,
439 );
440 assert_eq!(info.imports.len(), 1);
441 assert_eq!(info.imports[0].source, "./src/global.css");
442 }
443
444 #[test]
451 fn bare_script_src_normalized_to_relative() {
452 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
453 assert_eq!(info.imports.len(), 1);
454 assert_eq!(info.imports[0].source, "./app.js");
455 }
456
457 #[test]
458 fn bare_module_script_src_normalized_to_relative() {
459 let info = parse_html_to_module(
460 FileId(0),
461 r#"<script type="module" src="main.ts"></script>"#,
462 0,
463 );
464 assert_eq!(info.imports.len(), 1);
465 assert_eq!(info.imports[0].source, "./main.ts");
466 }
467
468 #[test]
469 fn bare_stylesheet_link_href_normalized_to_relative() {
470 let info = parse_html_to_module(
471 FileId(0),
472 r#"<link rel="stylesheet" href="styles.css" />"#,
473 0,
474 );
475 assert_eq!(info.imports.len(), 1);
476 assert_eq!(info.imports[0].source, "./styles.css");
477 }
478
479 #[test]
480 fn bare_link_href_reversed_attrs_normalized_to_relative() {
481 let info = parse_html_to_module(
482 FileId(0),
483 r#"<link href="styles.css" rel="stylesheet" />"#,
484 0,
485 );
486 assert_eq!(info.imports.len(), 1);
487 assert_eq!(info.imports[0].source, "./styles.css");
488 }
489
490 #[test]
491 fn bare_modulepreload_link_href_normalized_to_relative() {
492 let info = parse_html_to_module(
493 FileId(0),
494 r#"<link rel="modulepreload" href="vendor.js" />"#,
495 0,
496 );
497 assert_eq!(info.imports.len(), 1);
498 assert_eq!(info.imports[0].source, "./vendor.js");
499 }
500
501 #[test]
502 fn bare_asset_with_subdir_normalized_to_relative() {
503 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
504 assert_eq!(info.imports.len(), 1);
505 assert_eq!(info.imports[0].source, "./assets/app.js");
506 }
507
508 #[test]
509 fn root_absolute_script_src_unchanged() {
510 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
513 assert_eq!(info.imports.len(), 1);
514 assert_eq!(info.imports[0].source, "/src/main.ts");
515 }
516
517 #[test]
518 fn parent_relative_script_src_unchanged() {
519 let info = parse_html_to_module(
520 FileId(0),
521 r#"<script src="../shared/vendor.js"></script>"#,
522 0,
523 );
524 assert_eq!(info.imports.len(), 1);
525 assert_eq!(info.imports[0].source, "../shared/vendor.js");
526 }
527
528 #[test]
529 fn skips_preload_link() {
530 let info = parse_html_to_module(
531 FileId(0),
532 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
533 0,
534 );
535 assert!(info.imports.is_empty());
536 }
537
538 #[test]
539 fn skips_icon_link() {
540 let info =
541 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
542 assert!(info.imports.is_empty());
543 }
544
545 #[test]
546 fn skips_remote_stylesheet() {
547 let info = parse_html_to_module(
548 FileId(0),
549 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
550 0,
551 );
552 assert!(info.imports.is_empty());
553 }
554
555 #[test]
558 fn skips_commented_out_script() {
559 let info = parse_html_to_module(
560 FileId(0),
561 r#"<!-- <script src="./old.js"></script> -->
562 <script src="./new.js"></script>"#,
563 0,
564 );
565 assert_eq!(info.imports.len(), 1);
566 assert_eq!(info.imports[0].source, "./new.js");
567 }
568
569 #[test]
570 fn skips_commented_out_link() {
571 let info = parse_html_to_module(
572 FileId(0),
573 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
574 <link rel="stylesheet" href="./new.css" />"#,
575 0,
576 );
577 assert_eq!(info.imports.len(), 1);
578 assert_eq!(info.imports[0].source, "./new.css");
579 }
580
581 #[test]
584 fn handles_multiline_script_tag() {
585 let info = parse_html_to_module(
586 FileId(0),
587 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
588 0,
589 );
590 assert_eq!(info.imports.len(), 1);
591 assert_eq!(info.imports[0].source, "./src/entry.js");
592 }
593
594 #[test]
595 fn handles_multiline_link_tag() {
596 let info = parse_html_to_module(
597 FileId(0),
598 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
599 0,
600 );
601 assert_eq!(info.imports.len(), 1);
602 assert_eq!(info.imports[0].source, "./src/global.css");
603 }
604
605 #[test]
608 fn full_vite_html() {
609 let info = parse_html_to_module(
610 FileId(0),
611 r#"<!doctype html>
612<html>
613 <head>
614 <link rel="stylesheet" href="./src/global.css" />
615 <link rel="icon" href="/favicon.ico" />
616 </head>
617 <body>
618 <div id="app"></div>
619 <script type="module" src="./src/entry.js"></script>
620 </body>
621</html>"#,
622 0,
623 );
624 assert_eq!(info.imports.len(), 2);
625 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
626 assert!(sources.contains(&"./src/global.css"));
627 assert!(sources.contains(&"./src/entry.js"));
628 }
629
630 #[test]
633 fn empty_html() {
634 let info = parse_html_to_module(FileId(0), "", 0);
635 assert!(info.imports.is_empty());
636 }
637
638 #[test]
639 fn html_with_no_assets() {
640 let info = parse_html_to_module(
641 FileId(0),
642 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
643 0,
644 );
645 assert!(info.imports.is_empty());
646 }
647
648 #[test]
649 fn single_quoted_attributes() {
650 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
651 assert_eq!(info.imports.len(), 1);
652 assert_eq!(info.imports[0].source, "./src/entry.js");
653 }
654
655 #[test]
656 fn all_imports_are_side_effect() {
657 let info = parse_html_to_module(
658 FileId(0),
659 r#"<script src="./entry.js"></script>
660 <link rel="stylesheet" href="./style.css" />"#,
661 0,
662 );
663 for imp in &info.imports {
664 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
665 assert!(imp.local_name.is_empty());
666 assert!(!imp.is_type_only);
667 }
668 }
669
670 #[test]
671 fn suppression_comments_extracted() {
672 let info = parse_html_to_module(
673 FileId(0),
674 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
675 0,
676 );
677 assert_eq!(info.imports.len(), 1);
681 }
682
683 #[test]
686 fn angular_template_extracts_member_refs() {
687 let info = parse_html_to_module(
688 FileId(0),
689 "<h1>{{ title() }}</h1>\n\
690 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
691 <button (click)=\"onButtonClick()\">Toggle</button>",
692 0,
693 );
694 let names: rustc_hash::FxHashSet<&str> = info
695 .member_accesses
696 .iter()
697 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
698 .map(|a| a.member.as_str())
699 .collect();
700 assert!(names.contains("title"), "should contain 'title'");
701 assert!(
702 names.contains("isHighlighted"),
703 "should contain 'isHighlighted'"
704 );
705 assert!(names.contains("greeting"), "should contain 'greeting'");
706 assert!(
707 names.contains("onButtonClick"),
708 "should contain 'onButtonClick'"
709 );
710 }
711
712 #[test]
713 fn plain_html_no_angular_refs() {
714 let info = parse_html_to_module(
715 FileId(0),
716 "<!doctype html><html><body><h1>Hello</h1></body></html>",
717 0,
718 );
719 assert!(info.member_accesses.is_empty());
720 }
721}