1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| crate::static_regex(r"(?s)<!--.*?-->"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 crate::static_regex(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 crate::static_regex(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39});
40
41static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
43 crate::static_regex(
44 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
45 )
46});
47
48pub(crate) fn is_html_file(path: &Path) -> bool {
50 path.extension()
51 .and_then(|e| e.to_str())
52 .is_some_and(|ext| ext == "html")
53}
54
55pub(crate) fn is_remote_url(src: &str) -> bool {
57 src.starts_with("http://")
58 || src.starts_with("https://")
59 || src.starts_with("//")
60 || src.starts_with("data:")
61}
62
63pub(crate) fn is_template_placeholder(value: &str) -> bool {
78 value.contains("{{") || value.contains("###")
79}
80
81pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
88 let stripped = HTML_COMMENT_RE.replace_all(source, "");
89 let mut refs: Vec<String> = Vec::new();
90
91 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
92 if let Some(m) = cap.get(1) {
93 let src = m.as_str().trim();
94 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
95 refs.push(src.to_string());
96 }
97 }
98 }
99
100 for cap in LINK_HREF_RE.captures_iter(&stripped) {
101 if let Some(m) = cap.get(2) {
102 let href = m.as_str().trim();
103 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
104 refs.push(href.to_string());
105 }
106 }
107 }
108 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
109 if let Some(m) = cap.get(1) {
110 let href = m.as_str().trim();
111 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
112 refs.push(href.to_string());
113 }
114 }
115 }
116
117 refs
118}
119
120#[cfg(test)]
122pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
123 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
124}
125
126pub(crate) fn parse_html_to_module_with_complexity(
128 file_id: FileId,
129 source: &str,
130 content_hash: u64,
131 need_complexity: bool,
132) -> ModuleInfo {
133 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
134
135 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
136 .into_iter()
137 .map(|raw| ImportInfo {
138 source: normalize_asset_url(&raw),
139 imported_name: ImportedName::SideEffect,
140 local_name: String::new(),
141 is_type_only: false,
142 from_style: false,
143 span: Span::default(),
144 source_span: Span::default(),
145 })
146 .collect();
147
148 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
149 imports.dedup_by(|a, b| a.source == b.source);
150
151 let angular::AngularTemplateRefs {
152 identifiers,
153 member_accesses: template_member_accesses,
154 security_sinks,
155 } = angular::collect_angular_template_refs(source);
156 let mut member_accesses: Vec<MemberAccess> = identifiers
157 .into_iter()
158 .map(|name| MemberAccess {
159 object: ANGULAR_TPL_SENTINEL.to_string(),
160 member: name,
161 })
162 .collect();
163 member_accesses.extend(template_member_accesses);
164
165 let complexity = if need_complexity {
166 crate::template_complexity::compute_angular_template_complexity(source)
167 .into_iter()
168 .collect()
169 } else {
170 Vec::new()
171 };
172
173 ModuleInfo {
174 file_id,
175 exports: Vec::new(),
176 imports,
177 re_exports: Vec::new(),
178 dynamic_imports: Vec::new(),
179 dynamic_import_patterns: Vec::new(),
180 require_calls: Vec::new(),
181 package_path_references: Vec::new(),
182 member_accesses,
183 whole_object_uses: Vec::new(),
184 has_cjs_exports: false,
185 has_angular_component_template_url: false,
186 content_hash,
187 suppressions: parsed_suppressions.suppressions,
188 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
189 unused_import_bindings: Vec::new(),
190 type_referenced_import_bindings: Vec::new(),
191 value_referenced_import_bindings: Vec::new(),
192 line_offsets: fallow_types::extract::compute_line_offsets(source),
193 complexity,
194 flag_uses: Vec::new(),
195 class_heritage: vec![],
196 injection_tokens: vec![],
197 local_type_declarations: Vec::new(),
198 public_signature_type_references: Vec::new(),
199 namespace_object_aliases: Vec::new(),
200 iconify_prefixes: Vec::new(),
201 iconify_icon_names: Vec::new(),
202 auto_import_candidates: Vec::new(),
203 directives: Vec::new(),
204 security_sinks,
205 security_sinks_skipped: 0,
206 tainted_bindings: Vec::new(),
207 sanitized_sink_args: Vec::new(),
208 security_control_sites: Vec::new(),
209 }
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215
216 #[test]
217 fn is_html_file_html() {
218 assert!(is_html_file(Path::new("index.html")));
219 }
220
221 #[test]
222 fn is_html_file_nested() {
223 assert!(is_html_file(Path::new("pages/about.html")));
224 }
225
226 #[test]
227 fn is_html_file_rejects_htm() {
228 assert!(!is_html_file(Path::new("index.htm")));
229 }
230
231 #[test]
232 fn is_html_file_rejects_js() {
233 assert!(!is_html_file(Path::new("app.js")));
234 }
235
236 #[test]
237 fn is_html_file_rejects_ts() {
238 assert!(!is_html_file(Path::new("app.ts")));
239 }
240
241 #[test]
242 fn is_html_file_rejects_vue() {
243 assert!(!is_html_file(Path::new("App.vue")));
244 }
245
246 #[test]
247 fn remote_url_http() {
248 assert!(is_remote_url("http://example.com/script.js"));
249 }
250
251 #[test]
252 fn remote_url_https() {
253 assert!(is_remote_url("https://cdn.example.com/style.css"));
254 }
255
256 #[test]
257 fn remote_url_protocol_relative() {
258 assert!(is_remote_url("//cdn.example.com/lib.js"));
259 }
260
261 #[test]
262 fn remote_url_data() {
263 assert!(is_remote_url("data:text/javascript;base64,abc"));
264 }
265
266 #[test]
267 fn local_relative_not_remote() {
268 assert!(!is_remote_url("./src/entry.js"));
269 }
270
271 #[test]
272 fn local_root_relative_not_remote() {
273 assert!(!is_remote_url("/src/entry.js"));
274 }
275
276 #[test]
277 fn extracts_module_script_src() {
278 let info = parse_html_to_module(
279 FileId(0),
280 r#"<script type="module" src="./src/entry.js"></script>"#,
281 0,
282 );
283 assert_eq!(info.imports.len(), 1);
284 assert_eq!(info.imports[0].source, "./src/entry.js");
285 }
286
287 #[test]
288 fn extracts_plain_script_src() {
289 let info = parse_html_to_module(
290 FileId(0),
291 r#"<script src="./src/polyfills.js"></script>"#,
292 0,
293 );
294 assert_eq!(info.imports.len(), 1);
295 assert_eq!(info.imports[0].source, "./src/polyfills.js");
296 }
297
298 #[test]
299 fn extracts_multiple_scripts() {
300 let info = parse_html_to_module(
301 FileId(0),
302 r#"
303 <script type="module" src="./src/entry.js"></script>
304 <script src="./src/polyfills.js"></script>
305 "#,
306 0,
307 );
308 assert_eq!(info.imports.len(), 2);
309 }
310
311 #[test]
312 fn skips_inline_script() {
313 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
314 assert!(info.imports.is_empty());
315 }
316
317 #[test]
318 fn skips_handlebars_placeholder_in_script_src() {
319 let info = parse_html_to_module(
320 FileId(0),
321 r#"<script src="{{rootURL}}assets/app.js"></script>
322 <script src="{{config.assetsPath}}vendor.js"></script>"#,
323 0,
324 );
325 assert!(
326 info.imports.is_empty(),
327 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
328 info.imports
329 );
330 }
331
332 #[test]
333 fn skips_handlebars_placeholder_in_link_href() {
334 let info = parse_html_to_module(
335 FileId(0),
336 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
337 0,
338 );
339 assert!(info.imports.is_empty());
340 }
341
342 #[test]
343 fn skips_ember_cli_blueprint_placeholder() {
344 let info = parse_html_to_module(
345 FileId(0),
346 r####"<script src="###APPNAME###/app.js"></script>"####,
347 0,
348 );
349 assert!(info.imports.is_empty());
350 }
351
352 #[test]
353 fn extracts_normal_specifier_alongside_placeholders() {
354 let info = parse_html_to_module(
355 FileId(0),
356 r#"<script src="{{rootURL}}assets/app.js"></script>
357 <script src="./src/main.ts"></script>"#,
358 0,
359 );
360 assert_eq!(info.imports.len(), 1);
361 assert_eq!(info.imports[0].source, "./src/main.ts");
362 }
363
364 #[test]
365 fn skips_remote_script() {
366 let info = parse_html_to_module(
367 FileId(0),
368 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
369 0,
370 );
371 assert!(info.imports.is_empty());
372 }
373
374 #[test]
375 fn skips_protocol_relative_script() {
376 let info = parse_html_to_module(
377 FileId(0),
378 r#"<script src="//cdn.example.com/lib.js"></script>"#,
379 0,
380 );
381 assert!(info.imports.is_empty());
382 }
383
384 #[test]
385 fn extracts_stylesheet_link() {
386 let info = parse_html_to_module(
387 FileId(0),
388 r#"<link rel="stylesheet" href="./src/global.css" />"#,
389 0,
390 );
391 assert_eq!(info.imports.len(), 1);
392 assert_eq!(info.imports[0].source, "./src/global.css");
393 }
394
395 #[test]
396 fn extracts_modulepreload_link() {
397 let info = parse_html_to_module(
398 FileId(0),
399 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
400 0,
401 );
402 assert_eq!(info.imports.len(), 1);
403 assert_eq!(info.imports[0].source, "./src/vendor.js");
404 }
405
406 #[test]
407 fn extracts_link_with_reversed_attrs() {
408 let info = parse_html_to_module(
409 FileId(0),
410 r#"<link href="./src/global.css" rel="stylesheet" />"#,
411 0,
412 );
413 assert_eq!(info.imports.len(), 1);
414 assert_eq!(info.imports[0].source, "./src/global.css");
415 }
416
417 #[test]
418 fn bare_script_src_normalized_to_relative() {
419 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
420 assert_eq!(info.imports.len(), 1);
421 assert_eq!(info.imports[0].source, "./app.js");
422 }
423
424 #[test]
425 fn bare_module_script_src_normalized_to_relative() {
426 let info = parse_html_to_module(
427 FileId(0),
428 r#"<script type="module" src="main.ts"></script>"#,
429 0,
430 );
431 assert_eq!(info.imports.len(), 1);
432 assert_eq!(info.imports[0].source, "./main.ts");
433 }
434
435 #[test]
436 fn bare_stylesheet_link_href_normalized_to_relative() {
437 let info = parse_html_to_module(
438 FileId(0),
439 r#"<link rel="stylesheet" href="styles.css" />"#,
440 0,
441 );
442 assert_eq!(info.imports.len(), 1);
443 assert_eq!(info.imports[0].source, "./styles.css");
444 }
445
446 #[test]
447 fn bare_link_href_reversed_attrs_normalized_to_relative() {
448 let info = parse_html_to_module(
449 FileId(0),
450 r#"<link href="styles.css" rel="stylesheet" />"#,
451 0,
452 );
453 assert_eq!(info.imports.len(), 1);
454 assert_eq!(info.imports[0].source, "./styles.css");
455 }
456
457 #[test]
458 fn bare_modulepreload_link_href_normalized_to_relative() {
459 let info = parse_html_to_module(
460 FileId(0),
461 r#"<link rel="modulepreload" href="vendor.js" />"#,
462 0,
463 );
464 assert_eq!(info.imports.len(), 1);
465 assert_eq!(info.imports[0].source, "./vendor.js");
466 }
467
468 #[test]
469 fn bare_asset_with_subdir_normalized_to_relative() {
470 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
471 assert_eq!(info.imports.len(), 1);
472 assert_eq!(info.imports[0].source, "./assets/app.js");
473 }
474
475 #[test]
476 fn root_absolute_script_src_unchanged() {
477 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
478 assert_eq!(info.imports.len(), 1);
479 assert_eq!(info.imports[0].source, "/src/main.ts");
480 }
481
482 #[test]
483 fn parent_relative_script_src_unchanged() {
484 let info = parse_html_to_module(
485 FileId(0),
486 r#"<script src="../shared/vendor.js"></script>"#,
487 0,
488 );
489 assert_eq!(info.imports.len(), 1);
490 assert_eq!(info.imports[0].source, "../shared/vendor.js");
491 }
492
493 #[test]
494 fn skips_preload_link() {
495 let info = parse_html_to_module(
496 FileId(0),
497 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
498 0,
499 );
500 assert!(info.imports.is_empty());
501 }
502
503 #[test]
504 fn skips_icon_link() {
505 let info =
506 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
507 assert!(info.imports.is_empty());
508 }
509
510 #[test]
511 fn skips_remote_stylesheet() {
512 let info = parse_html_to_module(
513 FileId(0),
514 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
515 0,
516 );
517 assert!(info.imports.is_empty());
518 }
519
520 #[test]
521 fn skips_commented_out_script() {
522 let info = parse_html_to_module(
523 FileId(0),
524 r#"<!-- <script src="./old.js"></script> -->
525 <script src="./new.js"></script>"#,
526 0,
527 );
528 assert_eq!(info.imports.len(), 1);
529 assert_eq!(info.imports[0].source, "./new.js");
530 }
531
532 #[test]
533 fn skips_commented_out_link() {
534 let info = parse_html_to_module(
535 FileId(0),
536 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
537 <link rel="stylesheet" href="./new.css" />"#,
538 0,
539 );
540 assert_eq!(info.imports.len(), 1);
541 assert_eq!(info.imports[0].source, "./new.css");
542 }
543
544 #[test]
545 fn handles_multiline_script_tag() {
546 let info = parse_html_to_module(
547 FileId(0),
548 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
549 0,
550 );
551 assert_eq!(info.imports.len(), 1);
552 assert_eq!(info.imports[0].source, "./src/entry.js");
553 }
554
555 #[test]
556 fn handles_multiline_link_tag() {
557 let info = parse_html_to_module(
558 FileId(0),
559 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
560 0,
561 );
562 assert_eq!(info.imports.len(), 1);
563 assert_eq!(info.imports[0].source, "./src/global.css");
564 }
565
566 #[test]
567 fn full_vite_html() {
568 let info = parse_html_to_module(
569 FileId(0),
570 r#"<!doctype html>
571<html>
572 <head>
573 <link rel="stylesheet" href="./src/global.css" />
574 <link rel="icon" href="/favicon.ico" />
575 </head>
576 <body>
577 <div id="app"></div>
578 <script type="module" src="./src/entry.js"></script>
579 </body>
580</html>"#,
581 0,
582 );
583 assert_eq!(info.imports.len(), 2);
584 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
585 assert!(sources.contains(&"./src/global.css"));
586 assert!(sources.contains(&"./src/entry.js"));
587 }
588
589 #[test]
590 fn empty_html() {
591 let info = parse_html_to_module(FileId(0), "", 0);
592 assert!(info.imports.is_empty());
593 }
594
595 #[test]
596 fn html_with_no_assets() {
597 let info = parse_html_to_module(
598 FileId(0),
599 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
600 0,
601 );
602 assert!(info.imports.is_empty());
603 }
604
605 #[test]
606 fn single_quoted_attributes() {
607 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
608 assert_eq!(info.imports.len(), 1);
609 assert_eq!(info.imports[0].source, "./src/entry.js");
610 }
611
612 #[test]
613 fn all_imports_are_side_effect() {
614 let info = parse_html_to_module(
615 FileId(0),
616 r#"<script src="./entry.js"></script>
617 <link rel="stylesheet" href="./style.css" />"#,
618 0,
619 );
620 for imp in &info.imports {
621 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
622 assert!(imp.local_name.is_empty());
623 assert!(!imp.is_type_only);
624 }
625 }
626
627 #[test]
628 fn suppression_comments_extracted() {
629 let info = parse_html_to_module(
630 FileId(0),
631 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
632 0,
633 );
634 assert_eq!(info.imports.len(), 1);
635 }
636
637 #[test]
638 fn angular_template_extracts_member_refs() {
639 let info = parse_html_to_module(
640 FileId(0),
641 "<h1>{{ title() }}</h1>\n\
642 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
643 <button (click)=\"onButtonClick()\">Toggle</button>",
644 0,
645 );
646 let names: rustc_hash::FxHashSet<&str> = info
647 .member_accesses
648 .iter()
649 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
650 .map(|a| a.member.as_str())
651 .collect();
652 assert!(names.contains("title"), "should contain 'title'");
653 assert!(
654 names.contains("isHighlighted"),
655 "should contain 'isHighlighted'"
656 );
657 assert!(names.contains("greeting"), "should contain 'greeting'");
658 assert!(
659 names.contains("onButtonClick"),
660 "should contain 'onButtonClick'"
661 );
662 }
663
664 #[test]
665 fn plain_html_no_angular_refs() {
666 let info = parse_html_to_module(
667 FileId(0),
668 "<!doctype html><html><body><h1>Hello</h1></body></html>",
669 0,
670 );
671 assert!(info.member_accesses.is_empty());
672 }
673}