1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| crate::static_regex(r"(?s)<!--.*?-->"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 crate::static_regex(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 crate::static_regex(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39});
40
41static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
43 crate::static_regex(
44 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
45 )
46});
47
48pub(crate) fn is_html_file(path: &Path) -> bool {
50 path.extension()
51 .and_then(|e| e.to_str())
52 .is_some_and(|ext| ext == "html")
53}
54
55pub(crate) fn is_remote_url(src: &str) -> bool {
57 src.starts_with("http://")
58 || src.starts_with("https://")
59 || src.starts_with("//")
60 || src.starts_with("data:")
61}
62
63pub(crate) fn is_template_placeholder(value: &str) -> bool {
78 value.contains("{{") || value.contains("###")
79}
80
81pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
88 let stripped = HTML_COMMENT_RE.replace_all(source, "");
89 let mut refs: Vec<String> = Vec::new();
90
91 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
92 if let Some(m) = cap.get(1) {
93 let src = m.as_str().trim();
94 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
95 refs.push(src.to_string());
96 }
97 }
98 }
99
100 for cap in LINK_HREF_RE.captures_iter(&stripped) {
101 if let Some(m) = cap.get(2) {
102 let href = m.as_str().trim();
103 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
104 refs.push(href.to_string());
105 }
106 }
107 }
108 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
109 if let Some(m) = cap.get(1) {
110 let href = m.as_str().trim();
111 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
112 refs.push(href.to_string());
113 }
114 }
115 }
116
117 refs
118}
119
120#[cfg(test)]
122pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
123 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
124}
125
126pub(crate) fn parse_html_to_module_with_complexity(
128 file_id: FileId,
129 source: &str,
130 content_hash: u64,
131 need_complexity: bool,
132) -> ModuleInfo {
133 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
134
135 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
136 .into_iter()
137 .map(|raw| ImportInfo {
138 source: normalize_asset_url(&raw),
139 imported_name: ImportedName::SideEffect,
140 local_name: String::new(),
141 is_type_only: false,
142 from_style: false,
143 span: Span::default(),
144 source_span: Span::default(),
145 })
146 .collect();
147
148 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
149 imports.dedup_by(|a, b| a.source == b.source);
150
151 let template_refs = angular::collect_angular_template_refs(source);
152 let mut member_accesses: Vec<MemberAccess> = template_refs
153 .identifiers
154 .into_iter()
155 .map(|name| MemberAccess {
156 object: ANGULAR_TPL_SENTINEL.to_string(),
157 member: name,
158 })
159 .collect();
160 member_accesses.extend(template_refs.member_accesses);
161
162 let complexity = if need_complexity {
163 crate::template_complexity::compute_angular_template_complexity(source)
164 .into_iter()
165 .collect()
166 } else {
167 Vec::new()
168 };
169
170 ModuleInfo {
171 file_id,
172 exports: Vec::new(),
173 imports,
174 re_exports: Vec::new(),
175 dynamic_imports: Vec::new(),
176 dynamic_import_patterns: Vec::new(),
177 require_calls: Vec::new(),
178 member_accesses,
179 whole_object_uses: Vec::new(),
180 has_cjs_exports: false,
181 has_angular_component_template_url: false,
182 content_hash,
183 suppressions: parsed_suppressions.suppressions,
184 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
185 unused_import_bindings: Vec::new(),
186 type_referenced_import_bindings: Vec::new(),
187 value_referenced_import_bindings: Vec::new(),
188 line_offsets: fallow_types::extract::compute_line_offsets(source),
189 complexity,
190 flag_uses: Vec::new(),
191 class_heritage: vec![],
192 local_type_declarations: Vec::new(),
193 public_signature_type_references: Vec::new(),
194 namespace_object_aliases: Vec::new(),
195 iconify_prefixes: Vec::new(),
196 auto_import_candidates: Vec::new(),
197 directives: Vec::new(),
198 security_sinks: Vec::new(),
199 security_sinks_skipped: 0,
200 tainted_bindings: Vec::new(),
201 sanitized_sink_args: Vec::new(),
202 }
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208
209 #[test]
210 fn is_html_file_html() {
211 assert!(is_html_file(Path::new("index.html")));
212 }
213
214 #[test]
215 fn is_html_file_nested() {
216 assert!(is_html_file(Path::new("pages/about.html")));
217 }
218
219 #[test]
220 fn is_html_file_rejects_htm() {
221 assert!(!is_html_file(Path::new("index.htm")));
222 }
223
224 #[test]
225 fn is_html_file_rejects_js() {
226 assert!(!is_html_file(Path::new("app.js")));
227 }
228
229 #[test]
230 fn is_html_file_rejects_ts() {
231 assert!(!is_html_file(Path::new("app.ts")));
232 }
233
234 #[test]
235 fn is_html_file_rejects_vue() {
236 assert!(!is_html_file(Path::new("App.vue")));
237 }
238
239 #[test]
240 fn remote_url_http() {
241 assert!(is_remote_url("http://example.com/script.js"));
242 }
243
244 #[test]
245 fn remote_url_https() {
246 assert!(is_remote_url("https://cdn.example.com/style.css"));
247 }
248
249 #[test]
250 fn remote_url_protocol_relative() {
251 assert!(is_remote_url("//cdn.example.com/lib.js"));
252 }
253
254 #[test]
255 fn remote_url_data() {
256 assert!(is_remote_url("data:text/javascript;base64,abc"));
257 }
258
259 #[test]
260 fn local_relative_not_remote() {
261 assert!(!is_remote_url("./src/entry.js"));
262 }
263
264 #[test]
265 fn local_root_relative_not_remote() {
266 assert!(!is_remote_url("/src/entry.js"));
267 }
268
269 #[test]
270 fn extracts_module_script_src() {
271 let info = parse_html_to_module(
272 FileId(0),
273 r#"<script type="module" src="./src/entry.js"></script>"#,
274 0,
275 );
276 assert_eq!(info.imports.len(), 1);
277 assert_eq!(info.imports[0].source, "./src/entry.js");
278 }
279
280 #[test]
281 fn extracts_plain_script_src() {
282 let info = parse_html_to_module(
283 FileId(0),
284 r#"<script src="./src/polyfills.js"></script>"#,
285 0,
286 );
287 assert_eq!(info.imports.len(), 1);
288 assert_eq!(info.imports[0].source, "./src/polyfills.js");
289 }
290
291 #[test]
292 fn extracts_multiple_scripts() {
293 let info = parse_html_to_module(
294 FileId(0),
295 r#"
296 <script type="module" src="./src/entry.js"></script>
297 <script src="./src/polyfills.js"></script>
298 "#,
299 0,
300 );
301 assert_eq!(info.imports.len(), 2);
302 }
303
304 #[test]
305 fn skips_inline_script() {
306 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
307 assert!(info.imports.is_empty());
308 }
309
310 #[test]
311 fn skips_handlebars_placeholder_in_script_src() {
312 let info = parse_html_to_module(
313 FileId(0),
314 r#"<script src="{{rootURL}}assets/app.js"></script>
315 <script src="{{config.assetsPath}}vendor.js"></script>"#,
316 0,
317 );
318 assert!(
319 info.imports.is_empty(),
320 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
321 info.imports
322 );
323 }
324
325 #[test]
326 fn skips_handlebars_placeholder_in_link_href() {
327 let info = parse_html_to_module(
328 FileId(0),
329 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
330 0,
331 );
332 assert!(info.imports.is_empty());
333 }
334
335 #[test]
336 fn skips_ember_cli_blueprint_placeholder() {
337 let info = parse_html_to_module(
338 FileId(0),
339 r####"<script src="###APPNAME###/app.js"></script>"####,
340 0,
341 );
342 assert!(info.imports.is_empty());
343 }
344
345 #[test]
346 fn extracts_normal_specifier_alongside_placeholders() {
347 let info = parse_html_to_module(
348 FileId(0),
349 r#"<script src="{{rootURL}}assets/app.js"></script>
350 <script src="./src/main.ts"></script>"#,
351 0,
352 );
353 assert_eq!(info.imports.len(), 1);
354 assert_eq!(info.imports[0].source, "./src/main.ts");
355 }
356
357 #[test]
358 fn skips_remote_script() {
359 let info = parse_html_to_module(
360 FileId(0),
361 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
362 0,
363 );
364 assert!(info.imports.is_empty());
365 }
366
367 #[test]
368 fn skips_protocol_relative_script() {
369 let info = parse_html_to_module(
370 FileId(0),
371 r#"<script src="//cdn.example.com/lib.js"></script>"#,
372 0,
373 );
374 assert!(info.imports.is_empty());
375 }
376
377 #[test]
378 fn extracts_stylesheet_link() {
379 let info = parse_html_to_module(
380 FileId(0),
381 r#"<link rel="stylesheet" href="./src/global.css" />"#,
382 0,
383 );
384 assert_eq!(info.imports.len(), 1);
385 assert_eq!(info.imports[0].source, "./src/global.css");
386 }
387
388 #[test]
389 fn extracts_modulepreload_link() {
390 let info = parse_html_to_module(
391 FileId(0),
392 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
393 0,
394 );
395 assert_eq!(info.imports.len(), 1);
396 assert_eq!(info.imports[0].source, "./src/vendor.js");
397 }
398
399 #[test]
400 fn extracts_link_with_reversed_attrs() {
401 let info = parse_html_to_module(
402 FileId(0),
403 r#"<link href="./src/global.css" rel="stylesheet" />"#,
404 0,
405 );
406 assert_eq!(info.imports.len(), 1);
407 assert_eq!(info.imports[0].source, "./src/global.css");
408 }
409
410 #[test]
411 fn bare_script_src_normalized_to_relative() {
412 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
413 assert_eq!(info.imports.len(), 1);
414 assert_eq!(info.imports[0].source, "./app.js");
415 }
416
417 #[test]
418 fn bare_module_script_src_normalized_to_relative() {
419 let info = parse_html_to_module(
420 FileId(0),
421 r#"<script type="module" src="main.ts"></script>"#,
422 0,
423 );
424 assert_eq!(info.imports.len(), 1);
425 assert_eq!(info.imports[0].source, "./main.ts");
426 }
427
428 #[test]
429 fn bare_stylesheet_link_href_normalized_to_relative() {
430 let info = parse_html_to_module(
431 FileId(0),
432 r#"<link rel="stylesheet" href="styles.css" />"#,
433 0,
434 );
435 assert_eq!(info.imports.len(), 1);
436 assert_eq!(info.imports[0].source, "./styles.css");
437 }
438
439 #[test]
440 fn bare_link_href_reversed_attrs_normalized_to_relative() {
441 let info = parse_html_to_module(
442 FileId(0),
443 r#"<link href="styles.css" rel="stylesheet" />"#,
444 0,
445 );
446 assert_eq!(info.imports.len(), 1);
447 assert_eq!(info.imports[0].source, "./styles.css");
448 }
449
450 #[test]
451 fn bare_modulepreload_link_href_normalized_to_relative() {
452 let info = parse_html_to_module(
453 FileId(0),
454 r#"<link rel="modulepreload" href="vendor.js" />"#,
455 0,
456 );
457 assert_eq!(info.imports.len(), 1);
458 assert_eq!(info.imports[0].source, "./vendor.js");
459 }
460
461 #[test]
462 fn bare_asset_with_subdir_normalized_to_relative() {
463 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
464 assert_eq!(info.imports.len(), 1);
465 assert_eq!(info.imports[0].source, "./assets/app.js");
466 }
467
468 #[test]
469 fn root_absolute_script_src_unchanged() {
470 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
471 assert_eq!(info.imports.len(), 1);
472 assert_eq!(info.imports[0].source, "/src/main.ts");
473 }
474
475 #[test]
476 fn parent_relative_script_src_unchanged() {
477 let info = parse_html_to_module(
478 FileId(0),
479 r#"<script src="../shared/vendor.js"></script>"#,
480 0,
481 );
482 assert_eq!(info.imports.len(), 1);
483 assert_eq!(info.imports[0].source, "../shared/vendor.js");
484 }
485
486 #[test]
487 fn skips_preload_link() {
488 let info = parse_html_to_module(
489 FileId(0),
490 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
491 0,
492 );
493 assert!(info.imports.is_empty());
494 }
495
496 #[test]
497 fn skips_icon_link() {
498 let info =
499 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
500 assert!(info.imports.is_empty());
501 }
502
503 #[test]
504 fn skips_remote_stylesheet() {
505 let info = parse_html_to_module(
506 FileId(0),
507 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
508 0,
509 );
510 assert!(info.imports.is_empty());
511 }
512
513 #[test]
514 fn skips_commented_out_script() {
515 let info = parse_html_to_module(
516 FileId(0),
517 r#"<!-- <script src="./old.js"></script> -->
518 <script src="./new.js"></script>"#,
519 0,
520 );
521 assert_eq!(info.imports.len(), 1);
522 assert_eq!(info.imports[0].source, "./new.js");
523 }
524
525 #[test]
526 fn skips_commented_out_link() {
527 let info = parse_html_to_module(
528 FileId(0),
529 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
530 <link rel="stylesheet" href="./new.css" />"#,
531 0,
532 );
533 assert_eq!(info.imports.len(), 1);
534 assert_eq!(info.imports[0].source, "./new.css");
535 }
536
537 #[test]
538 fn handles_multiline_script_tag() {
539 let info = parse_html_to_module(
540 FileId(0),
541 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
542 0,
543 );
544 assert_eq!(info.imports.len(), 1);
545 assert_eq!(info.imports[0].source, "./src/entry.js");
546 }
547
548 #[test]
549 fn handles_multiline_link_tag() {
550 let info = parse_html_to_module(
551 FileId(0),
552 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
553 0,
554 );
555 assert_eq!(info.imports.len(), 1);
556 assert_eq!(info.imports[0].source, "./src/global.css");
557 }
558
559 #[test]
560 fn full_vite_html() {
561 let info = parse_html_to_module(
562 FileId(0),
563 r#"<!doctype html>
564<html>
565 <head>
566 <link rel="stylesheet" href="./src/global.css" />
567 <link rel="icon" href="/favicon.ico" />
568 </head>
569 <body>
570 <div id="app"></div>
571 <script type="module" src="./src/entry.js"></script>
572 </body>
573</html>"#,
574 0,
575 );
576 assert_eq!(info.imports.len(), 2);
577 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
578 assert!(sources.contains(&"./src/global.css"));
579 assert!(sources.contains(&"./src/entry.js"));
580 }
581
582 #[test]
583 fn empty_html() {
584 let info = parse_html_to_module(FileId(0), "", 0);
585 assert!(info.imports.is_empty());
586 }
587
588 #[test]
589 fn html_with_no_assets() {
590 let info = parse_html_to_module(
591 FileId(0),
592 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
593 0,
594 );
595 assert!(info.imports.is_empty());
596 }
597
598 #[test]
599 fn single_quoted_attributes() {
600 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
601 assert_eq!(info.imports.len(), 1);
602 assert_eq!(info.imports[0].source, "./src/entry.js");
603 }
604
605 #[test]
606 fn all_imports_are_side_effect() {
607 let info = parse_html_to_module(
608 FileId(0),
609 r#"<script src="./entry.js"></script>
610 <link rel="stylesheet" href="./style.css" />"#,
611 0,
612 );
613 for imp in &info.imports {
614 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
615 assert!(imp.local_name.is_empty());
616 assert!(!imp.is_type_only);
617 }
618 }
619
620 #[test]
621 fn suppression_comments_extracted() {
622 let info = parse_html_to_module(
623 FileId(0),
624 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
625 0,
626 );
627 assert_eq!(info.imports.len(), 1);
628 }
629
630 #[test]
631 fn angular_template_extracts_member_refs() {
632 let info = parse_html_to_module(
633 FileId(0),
634 "<h1>{{ title() }}</h1>\n\
635 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
636 <button (click)=\"onButtonClick()\">Toggle</button>",
637 0,
638 );
639 let names: rustc_hash::FxHashSet<&str> = info
640 .member_accesses
641 .iter()
642 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
643 .map(|a| a.member.as_str())
644 .collect();
645 assert!(names.contains("title"), "should contain 'title'");
646 assert!(
647 names.contains("isHighlighted"),
648 "should contain 'isHighlighted'"
649 );
650 assert!(names.contains("greeting"), "should contain 'greeting'");
651 assert!(
652 names.contains("onButtonClick"),
653 "should contain 'onButtonClick'"
654 );
655 }
656
657 #[test]
658 fn plain_html_no_angular_refs() {
659 let info = parse_html_to_module(
660 FileId(0),
661 "<!doctype html><html><body><h1>Hello</h1></body></html>",
662 0,
663 );
664 assert!(info.member_accesses.is_empty());
665 }
666}