1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| crate::static_regex(r"(?s)<!--.*?-->"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 crate::static_regex(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 crate::static_regex(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39});
40
41static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
43 crate::static_regex(
44 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
45 )
46});
47
48pub(crate) fn is_html_file(path: &Path) -> bool {
50 path.extension()
51 .and_then(|e| e.to_str())
52 .is_some_and(|ext| ext == "html")
53}
54
55pub(crate) fn is_remote_url(src: &str) -> bool {
57 src.starts_with("http://")
58 || src.starts_with("https://")
59 || src.starts_with("//")
60 || src.starts_with("data:")
61}
62
63pub(crate) fn is_template_placeholder(value: &str) -> bool {
78 value.contains("{{") || value.contains("###")
79}
80
81pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
88 let stripped = HTML_COMMENT_RE.replace_all(source, "");
89 let mut refs: Vec<String> = Vec::new();
90
91 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
92 if let Some(m) = cap.get(1) {
93 let src = m.as_str().trim();
94 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
95 refs.push(src.to_string());
96 }
97 }
98 }
99
100 for cap in LINK_HREF_RE.captures_iter(&stripped) {
101 if let Some(m) = cap.get(2) {
102 let href = m.as_str().trim();
103 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
104 refs.push(href.to_string());
105 }
106 }
107 }
108 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
109 if let Some(m) = cap.get(1) {
110 let href = m.as_str().trim();
111 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
112 refs.push(href.to_string());
113 }
114 }
115 }
116
117 refs
118}
119
120#[cfg(test)]
122pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
123 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
124}
125
126pub(crate) fn parse_html_to_module_with_complexity(
128 file_id: FileId,
129 source: &str,
130 content_hash: u64,
131 need_complexity: bool,
132) -> ModuleInfo {
133 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
134
135 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
136 .into_iter()
137 .map(|raw| ImportInfo {
138 source: normalize_asset_url(&raw),
139 imported_name: ImportedName::SideEffect,
140 local_name: String::new(),
141 is_type_only: false,
142 from_style: false,
143 span: Span::default(),
144 source_span: Span::default(),
145 })
146 .collect();
147
148 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
149 imports.dedup_by(|a, b| a.source == b.source);
150
151 let template_refs = angular::collect_angular_template_refs(source);
152 let mut member_accesses: Vec<MemberAccess> = template_refs
153 .identifiers
154 .into_iter()
155 .map(|name| MemberAccess {
156 object: ANGULAR_TPL_SENTINEL.to_string(),
157 member: name,
158 })
159 .collect();
160 member_accesses.extend(template_refs.member_accesses);
161
162 let complexity = if need_complexity {
163 crate::template_complexity::compute_angular_template_complexity(source)
164 .into_iter()
165 .collect()
166 } else {
167 Vec::new()
168 };
169
170 ModuleInfo {
171 file_id,
172 exports: Vec::new(),
173 imports,
174 re_exports: Vec::new(),
175 dynamic_imports: Vec::new(),
176 dynamic_import_patterns: Vec::new(),
177 require_calls: Vec::new(),
178 member_accesses,
179 whole_object_uses: Vec::new(),
180 has_cjs_exports: false,
181 has_angular_component_template_url: false,
182 content_hash,
183 suppressions: parsed_suppressions.suppressions,
184 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
185 unused_import_bindings: Vec::new(),
186 type_referenced_import_bindings: Vec::new(),
187 value_referenced_import_bindings: Vec::new(),
188 line_offsets: fallow_types::extract::compute_line_offsets(source),
189 complexity,
190 flag_uses: Vec::new(),
191 class_heritage: vec![],
192 injection_tokens: vec![],
193 local_type_declarations: Vec::new(),
194 public_signature_type_references: Vec::new(),
195 namespace_object_aliases: Vec::new(),
196 iconify_prefixes: Vec::new(),
197 auto_import_candidates: Vec::new(),
198 directives: Vec::new(),
199 security_sinks: Vec::new(),
200 security_sinks_skipped: 0,
201 tainted_bindings: Vec::new(),
202 sanitized_sink_args: Vec::new(),
203 }
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209
210 #[test]
211 fn is_html_file_html() {
212 assert!(is_html_file(Path::new("index.html")));
213 }
214
215 #[test]
216 fn is_html_file_nested() {
217 assert!(is_html_file(Path::new("pages/about.html")));
218 }
219
220 #[test]
221 fn is_html_file_rejects_htm() {
222 assert!(!is_html_file(Path::new("index.htm")));
223 }
224
225 #[test]
226 fn is_html_file_rejects_js() {
227 assert!(!is_html_file(Path::new("app.js")));
228 }
229
230 #[test]
231 fn is_html_file_rejects_ts() {
232 assert!(!is_html_file(Path::new("app.ts")));
233 }
234
235 #[test]
236 fn is_html_file_rejects_vue() {
237 assert!(!is_html_file(Path::new("App.vue")));
238 }
239
240 #[test]
241 fn remote_url_http() {
242 assert!(is_remote_url("http://example.com/script.js"));
243 }
244
245 #[test]
246 fn remote_url_https() {
247 assert!(is_remote_url("https://cdn.example.com/style.css"));
248 }
249
250 #[test]
251 fn remote_url_protocol_relative() {
252 assert!(is_remote_url("//cdn.example.com/lib.js"));
253 }
254
255 #[test]
256 fn remote_url_data() {
257 assert!(is_remote_url("data:text/javascript;base64,abc"));
258 }
259
260 #[test]
261 fn local_relative_not_remote() {
262 assert!(!is_remote_url("./src/entry.js"));
263 }
264
265 #[test]
266 fn local_root_relative_not_remote() {
267 assert!(!is_remote_url("/src/entry.js"));
268 }
269
270 #[test]
271 fn extracts_module_script_src() {
272 let info = parse_html_to_module(
273 FileId(0),
274 r#"<script type="module" src="./src/entry.js"></script>"#,
275 0,
276 );
277 assert_eq!(info.imports.len(), 1);
278 assert_eq!(info.imports[0].source, "./src/entry.js");
279 }
280
281 #[test]
282 fn extracts_plain_script_src() {
283 let info = parse_html_to_module(
284 FileId(0),
285 r#"<script src="./src/polyfills.js"></script>"#,
286 0,
287 );
288 assert_eq!(info.imports.len(), 1);
289 assert_eq!(info.imports[0].source, "./src/polyfills.js");
290 }
291
292 #[test]
293 fn extracts_multiple_scripts() {
294 let info = parse_html_to_module(
295 FileId(0),
296 r#"
297 <script type="module" src="./src/entry.js"></script>
298 <script src="./src/polyfills.js"></script>
299 "#,
300 0,
301 );
302 assert_eq!(info.imports.len(), 2);
303 }
304
305 #[test]
306 fn skips_inline_script() {
307 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
308 assert!(info.imports.is_empty());
309 }
310
311 #[test]
312 fn skips_handlebars_placeholder_in_script_src() {
313 let info = parse_html_to_module(
314 FileId(0),
315 r#"<script src="{{rootURL}}assets/app.js"></script>
316 <script src="{{config.assetsPath}}vendor.js"></script>"#,
317 0,
318 );
319 assert!(
320 info.imports.is_empty(),
321 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
322 info.imports
323 );
324 }
325
326 #[test]
327 fn skips_handlebars_placeholder_in_link_href() {
328 let info = parse_html_to_module(
329 FileId(0),
330 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
331 0,
332 );
333 assert!(info.imports.is_empty());
334 }
335
336 #[test]
337 fn skips_ember_cli_blueprint_placeholder() {
338 let info = parse_html_to_module(
339 FileId(0),
340 r####"<script src="###APPNAME###/app.js"></script>"####,
341 0,
342 );
343 assert!(info.imports.is_empty());
344 }
345
346 #[test]
347 fn extracts_normal_specifier_alongside_placeholders() {
348 let info = parse_html_to_module(
349 FileId(0),
350 r#"<script src="{{rootURL}}assets/app.js"></script>
351 <script src="./src/main.ts"></script>"#,
352 0,
353 );
354 assert_eq!(info.imports.len(), 1);
355 assert_eq!(info.imports[0].source, "./src/main.ts");
356 }
357
358 #[test]
359 fn skips_remote_script() {
360 let info = parse_html_to_module(
361 FileId(0),
362 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
363 0,
364 );
365 assert!(info.imports.is_empty());
366 }
367
368 #[test]
369 fn skips_protocol_relative_script() {
370 let info = parse_html_to_module(
371 FileId(0),
372 r#"<script src="//cdn.example.com/lib.js"></script>"#,
373 0,
374 );
375 assert!(info.imports.is_empty());
376 }
377
378 #[test]
379 fn extracts_stylesheet_link() {
380 let info = parse_html_to_module(
381 FileId(0),
382 r#"<link rel="stylesheet" href="./src/global.css" />"#,
383 0,
384 );
385 assert_eq!(info.imports.len(), 1);
386 assert_eq!(info.imports[0].source, "./src/global.css");
387 }
388
389 #[test]
390 fn extracts_modulepreload_link() {
391 let info = parse_html_to_module(
392 FileId(0),
393 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
394 0,
395 );
396 assert_eq!(info.imports.len(), 1);
397 assert_eq!(info.imports[0].source, "./src/vendor.js");
398 }
399
400 #[test]
401 fn extracts_link_with_reversed_attrs() {
402 let info = parse_html_to_module(
403 FileId(0),
404 r#"<link href="./src/global.css" rel="stylesheet" />"#,
405 0,
406 );
407 assert_eq!(info.imports.len(), 1);
408 assert_eq!(info.imports[0].source, "./src/global.css");
409 }
410
411 #[test]
412 fn bare_script_src_normalized_to_relative() {
413 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
414 assert_eq!(info.imports.len(), 1);
415 assert_eq!(info.imports[0].source, "./app.js");
416 }
417
418 #[test]
419 fn bare_module_script_src_normalized_to_relative() {
420 let info = parse_html_to_module(
421 FileId(0),
422 r#"<script type="module" src="main.ts"></script>"#,
423 0,
424 );
425 assert_eq!(info.imports.len(), 1);
426 assert_eq!(info.imports[0].source, "./main.ts");
427 }
428
429 #[test]
430 fn bare_stylesheet_link_href_normalized_to_relative() {
431 let info = parse_html_to_module(
432 FileId(0),
433 r#"<link rel="stylesheet" href="styles.css" />"#,
434 0,
435 );
436 assert_eq!(info.imports.len(), 1);
437 assert_eq!(info.imports[0].source, "./styles.css");
438 }
439
440 #[test]
441 fn bare_link_href_reversed_attrs_normalized_to_relative() {
442 let info = parse_html_to_module(
443 FileId(0),
444 r#"<link href="styles.css" rel="stylesheet" />"#,
445 0,
446 );
447 assert_eq!(info.imports.len(), 1);
448 assert_eq!(info.imports[0].source, "./styles.css");
449 }
450
451 #[test]
452 fn bare_modulepreload_link_href_normalized_to_relative() {
453 let info = parse_html_to_module(
454 FileId(0),
455 r#"<link rel="modulepreload" href="vendor.js" />"#,
456 0,
457 );
458 assert_eq!(info.imports.len(), 1);
459 assert_eq!(info.imports[0].source, "./vendor.js");
460 }
461
462 #[test]
463 fn bare_asset_with_subdir_normalized_to_relative() {
464 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
465 assert_eq!(info.imports.len(), 1);
466 assert_eq!(info.imports[0].source, "./assets/app.js");
467 }
468
469 #[test]
470 fn root_absolute_script_src_unchanged() {
471 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
472 assert_eq!(info.imports.len(), 1);
473 assert_eq!(info.imports[0].source, "/src/main.ts");
474 }
475
476 #[test]
477 fn parent_relative_script_src_unchanged() {
478 let info = parse_html_to_module(
479 FileId(0),
480 r#"<script src="../shared/vendor.js"></script>"#,
481 0,
482 );
483 assert_eq!(info.imports.len(), 1);
484 assert_eq!(info.imports[0].source, "../shared/vendor.js");
485 }
486
487 #[test]
488 fn skips_preload_link() {
489 let info = parse_html_to_module(
490 FileId(0),
491 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
492 0,
493 );
494 assert!(info.imports.is_empty());
495 }
496
497 #[test]
498 fn skips_icon_link() {
499 let info =
500 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
501 assert!(info.imports.is_empty());
502 }
503
504 #[test]
505 fn skips_remote_stylesheet() {
506 let info = parse_html_to_module(
507 FileId(0),
508 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
509 0,
510 );
511 assert!(info.imports.is_empty());
512 }
513
514 #[test]
515 fn skips_commented_out_script() {
516 let info = parse_html_to_module(
517 FileId(0),
518 r#"<!-- <script src="./old.js"></script> -->
519 <script src="./new.js"></script>"#,
520 0,
521 );
522 assert_eq!(info.imports.len(), 1);
523 assert_eq!(info.imports[0].source, "./new.js");
524 }
525
526 #[test]
527 fn skips_commented_out_link() {
528 let info = parse_html_to_module(
529 FileId(0),
530 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
531 <link rel="stylesheet" href="./new.css" />"#,
532 0,
533 );
534 assert_eq!(info.imports.len(), 1);
535 assert_eq!(info.imports[0].source, "./new.css");
536 }
537
538 #[test]
539 fn handles_multiline_script_tag() {
540 let info = parse_html_to_module(
541 FileId(0),
542 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
543 0,
544 );
545 assert_eq!(info.imports.len(), 1);
546 assert_eq!(info.imports[0].source, "./src/entry.js");
547 }
548
549 #[test]
550 fn handles_multiline_link_tag() {
551 let info = parse_html_to_module(
552 FileId(0),
553 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
554 0,
555 );
556 assert_eq!(info.imports.len(), 1);
557 assert_eq!(info.imports[0].source, "./src/global.css");
558 }
559
560 #[test]
561 fn full_vite_html() {
562 let info = parse_html_to_module(
563 FileId(0),
564 r#"<!doctype html>
565<html>
566 <head>
567 <link rel="stylesheet" href="./src/global.css" />
568 <link rel="icon" href="/favicon.ico" />
569 </head>
570 <body>
571 <div id="app"></div>
572 <script type="module" src="./src/entry.js"></script>
573 </body>
574</html>"#,
575 0,
576 );
577 assert_eq!(info.imports.len(), 2);
578 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
579 assert!(sources.contains(&"./src/global.css"));
580 assert!(sources.contains(&"./src/entry.js"));
581 }
582
583 #[test]
584 fn empty_html() {
585 let info = parse_html_to_module(FileId(0), "", 0);
586 assert!(info.imports.is_empty());
587 }
588
589 #[test]
590 fn html_with_no_assets() {
591 let info = parse_html_to_module(
592 FileId(0),
593 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
594 0,
595 );
596 assert!(info.imports.is_empty());
597 }
598
599 #[test]
600 fn single_quoted_attributes() {
601 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
602 assert_eq!(info.imports.len(), 1);
603 assert_eq!(info.imports[0].source, "./src/entry.js");
604 }
605
606 #[test]
607 fn all_imports_are_side_effect() {
608 let info = parse_html_to_module(
609 FileId(0),
610 r#"<script src="./entry.js"></script>
611 <link rel="stylesheet" href="./style.css" />"#,
612 0,
613 );
614 for imp in &info.imports {
615 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
616 assert!(imp.local_name.is_empty());
617 assert!(!imp.is_type_only);
618 }
619 }
620
621 #[test]
622 fn suppression_comments_extracted() {
623 let info = parse_html_to_module(
624 FileId(0),
625 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
626 0,
627 );
628 assert_eq!(info.imports.len(), 1);
629 }
630
631 #[test]
632 fn angular_template_extracts_member_refs() {
633 let info = parse_html_to_module(
634 FileId(0),
635 "<h1>{{ title() }}</h1>\n\
636 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
637 <button (click)=\"onButtonClick()\">Toggle</button>",
638 0,
639 );
640 let names: rustc_hash::FxHashSet<&str> = info
641 .member_accesses
642 .iter()
643 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
644 .map(|a| a.member.as_str())
645 .collect();
646 assert!(names.contains("title"), "should contain 'title'");
647 assert!(
648 names.contains("isHighlighted"),
649 "should contain 'isHighlighted'"
650 );
651 assert!(names.contains("greeting"), "should contain 'greeting'");
652 assert!(
653 names.contains("onButtonClick"),
654 "should contain 'onButtonClick'"
655 );
656 }
657
658 #[test]
659 fn plain_html_no_angular_refs() {
660 let info = parse_html_to_module(
661 FileId(0),
662 "<!doctype html><html><body><h1>Hello</h1></body></html>",
663 0,
664 );
665 assert!(info.member_accesses.is_empty());
666 }
667}