1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| crate::static_regex(r"(?s)<!--.*?-->"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 crate::static_regex(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 crate::static_regex(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39});
40
41static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
43 crate::static_regex(
44 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
45 )
46});
47
48pub(crate) fn is_html_file(path: &Path) -> bool {
50 path.extension()
51 .and_then(|e| e.to_str())
52 .is_some_and(|ext| ext == "html")
53}
54
55pub(crate) fn is_remote_url(src: &str) -> bool {
57 src.starts_with("http://")
58 || src.starts_with("https://")
59 || src.starts_with("//")
60 || src.starts_with("data:")
61}
62
63pub(crate) fn is_template_placeholder(value: &str) -> bool {
78 value.contains("{{") || value.contains("###")
79}
80
81pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
88 let stripped = HTML_COMMENT_RE.replace_all(source, "");
89 let mut refs: Vec<String> = Vec::new();
90
91 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
92 if let Some(m) = cap.get(1) {
93 let src = m.as_str().trim();
94 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
95 refs.push(src.to_string());
96 }
97 }
98 }
99
100 for cap in LINK_HREF_RE.captures_iter(&stripped) {
101 if let Some(m) = cap.get(2) {
102 let href = m.as_str().trim();
103 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
104 refs.push(href.to_string());
105 }
106 }
107 }
108 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
109 if let Some(m) = cap.get(1) {
110 let href = m.as_str().trim();
111 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
112 refs.push(href.to_string());
113 }
114 }
115 }
116
117 refs
118}
119
120#[cfg(test)]
122pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
123 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
124}
125
126pub(crate) fn parse_html_to_module_with_complexity(
128 file_id: FileId,
129 source: &str,
130 content_hash: u64,
131 need_complexity: bool,
132) -> ModuleInfo {
133 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
134
135 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
136 .into_iter()
137 .map(|raw| ImportInfo {
138 source: normalize_asset_url(&raw),
139 imported_name: ImportedName::SideEffect,
140 local_name: String::new(),
141 is_type_only: false,
142 from_style: false,
143 span: Span::default(),
144 source_span: Span::default(),
145 })
146 .collect();
147
148 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
149 imports.dedup_by(|a, b| a.source == b.source);
150
151 let template_refs = angular::collect_angular_template_refs(source);
152 let mut member_accesses: Vec<MemberAccess> = template_refs
153 .identifiers
154 .into_iter()
155 .map(|name| MemberAccess {
156 object: ANGULAR_TPL_SENTINEL.to_string(),
157 member: name,
158 })
159 .collect();
160 member_accesses.extend(template_refs.member_accesses);
161
162 let complexity = if need_complexity {
163 crate::template_complexity::compute_angular_template_complexity(source)
164 .into_iter()
165 .collect()
166 } else {
167 Vec::new()
168 };
169
170 ModuleInfo {
171 file_id,
172 exports: Vec::new(),
173 imports,
174 re_exports: Vec::new(),
175 dynamic_imports: Vec::new(),
176 dynamic_import_patterns: Vec::new(),
177 require_calls: Vec::new(),
178 member_accesses,
179 whole_object_uses: Vec::new(),
180 has_cjs_exports: false,
181 has_angular_component_template_url: false,
182 content_hash,
183 suppressions: parsed_suppressions.suppressions,
184 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
185 unused_import_bindings: Vec::new(),
186 type_referenced_import_bindings: Vec::new(),
187 value_referenced_import_bindings: Vec::new(),
188 line_offsets: fallow_types::extract::compute_line_offsets(source),
189 complexity,
190 flag_uses: Vec::new(),
191 class_heritage: vec![],
192 local_type_declarations: Vec::new(),
193 public_signature_type_references: Vec::new(),
194 namespace_object_aliases: Vec::new(),
195 iconify_prefixes: Vec::new(),
196 auto_import_candidates: Vec::new(),
197 directives: Vec::new(),
198 security_sinks: Vec::new(),
199 security_sinks_skipped: 0,
200 }
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206
207 #[test]
208 fn is_html_file_html() {
209 assert!(is_html_file(Path::new("index.html")));
210 }
211
212 #[test]
213 fn is_html_file_nested() {
214 assert!(is_html_file(Path::new("pages/about.html")));
215 }
216
217 #[test]
218 fn is_html_file_rejects_htm() {
219 assert!(!is_html_file(Path::new("index.htm")));
220 }
221
222 #[test]
223 fn is_html_file_rejects_js() {
224 assert!(!is_html_file(Path::new("app.js")));
225 }
226
227 #[test]
228 fn is_html_file_rejects_ts() {
229 assert!(!is_html_file(Path::new("app.ts")));
230 }
231
232 #[test]
233 fn is_html_file_rejects_vue() {
234 assert!(!is_html_file(Path::new("App.vue")));
235 }
236
237 #[test]
238 fn remote_url_http() {
239 assert!(is_remote_url("http://example.com/script.js"));
240 }
241
242 #[test]
243 fn remote_url_https() {
244 assert!(is_remote_url("https://cdn.example.com/style.css"));
245 }
246
247 #[test]
248 fn remote_url_protocol_relative() {
249 assert!(is_remote_url("//cdn.example.com/lib.js"));
250 }
251
252 #[test]
253 fn remote_url_data() {
254 assert!(is_remote_url("data:text/javascript;base64,abc"));
255 }
256
257 #[test]
258 fn local_relative_not_remote() {
259 assert!(!is_remote_url("./src/entry.js"));
260 }
261
262 #[test]
263 fn local_root_relative_not_remote() {
264 assert!(!is_remote_url("/src/entry.js"));
265 }
266
267 #[test]
268 fn extracts_module_script_src() {
269 let info = parse_html_to_module(
270 FileId(0),
271 r#"<script type="module" src="./src/entry.js"></script>"#,
272 0,
273 );
274 assert_eq!(info.imports.len(), 1);
275 assert_eq!(info.imports[0].source, "./src/entry.js");
276 }
277
278 #[test]
279 fn extracts_plain_script_src() {
280 let info = parse_html_to_module(
281 FileId(0),
282 r#"<script src="./src/polyfills.js"></script>"#,
283 0,
284 );
285 assert_eq!(info.imports.len(), 1);
286 assert_eq!(info.imports[0].source, "./src/polyfills.js");
287 }
288
289 #[test]
290 fn extracts_multiple_scripts() {
291 let info = parse_html_to_module(
292 FileId(0),
293 r#"
294 <script type="module" src="./src/entry.js"></script>
295 <script src="./src/polyfills.js"></script>
296 "#,
297 0,
298 );
299 assert_eq!(info.imports.len(), 2);
300 }
301
302 #[test]
303 fn skips_inline_script() {
304 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
305 assert!(info.imports.is_empty());
306 }
307
308 #[test]
309 fn skips_handlebars_placeholder_in_script_src() {
310 let info = parse_html_to_module(
311 FileId(0),
312 r#"<script src="{{rootURL}}assets/app.js"></script>
313 <script src="{{config.assetsPath}}vendor.js"></script>"#,
314 0,
315 );
316 assert!(
317 info.imports.is_empty(),
318 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
319 info.imports
320 );
321 }
322
323 #[test]
324 fn skips_handlebars_placeholder_in_link_href() {
325 let info = parse_html_to_module(
326 FileId(0),
327 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
328 0,
329 );
330 assert!(info.imports.is_empty());
331 }
332
333 #[test]
334 fn skips_ember_cli_blueprint_placeholder() {
335 let info = parse_html_to_module(
336 FileId(0),
337 r####"<script src="###APPNAME###/app.js"></script>"####,
338 0,
339 );
340 assert!(info.imports.is_empty());
341 }
342
343 #[test]
344 fn extracts_normal_specifier_alongside_placeholders() {
345 let info = parse_html_to_module(
346 FileId(0),
347 r#"<script src="{{rootURL}}assets/app.js"></script>
348 <script src="./src/main.ts"></script>"#,
349 0,
350 );
351 assert_eq!(info.imports.len(), 1);
352 assert_eq!(info.imports[0].source, "./src/main.ts");
353 }
354
355 #[test]
356 fn skips_remote_script() {
357 let info = parse_html_to_module(
358 FileId(0),
359 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
360 0,
361 );
362 assert!(info.imports.is_empty());
363 }
364
365 #[test]
366 fn skips_protocol_relative_script() {
367 let info = parse_html_to_module(
368 FileId(0),
369 r#"<script src="//cdn.example.com/lib.js"></script>"#,
370 0,
371 );
372 assert!(info.imports.is_empty());
373 }
374
375 #[test]
376 fn extracts_stylesheet_link() {
377 let info = parse_html_to_module(
378 FileId(0),
379 r#"<link rel="stylesheet" href="./src/global.css" />"#,
380 0,
381 );
382 assert_eq!(info.imports.len(), 1);
383 assert_eq!(info.imports[0].source, "./src/global.css");
384 }
385
386 #[test]
387 fn extracts_modulepreload_link() {
388 let info = parse_html_to_module(
389 FileId(0),
390 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
391 0,
392 );
393 assert_eq!(info.imports.len(), 1);
394 assert_eq!(info.imports[0].source, "./src/vendor.js");
395 }
396
397 #[test]
398 fn extracts_link_with_reversed_attrs() {
399 let info = parse_html_to_module(
400 FileId(0),
401 r#"<link href="./src/global.css" rel="stylesheet" />"#,
402 0,
403 );
404 assert_eq!(info.imports.len(), 1);
405 assert_eq!(info.imports[0].source, "./src/global.css");
406 }
407
408 #[test]
409 fn bare_script_src_normalized_to_relative() {
410 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
411 assert_eq!(info.imports.len(), 1);
412 assert_eq!(info.imports[0].source, "./app.js");
413 }
414
415 #[test]
416 fn bare_module_script_src_normalized_to_relative() {
417 let info = parse_html_to_module(
418 FileId(0),
419 r#"<script type="module" src="main.ts"></script>"#,
420 0,
421 );
422 assert_eq!(info.imports.len(), 1);
423 assert_eq!(info.imports[0].source, "./main.ts");
424 }
425
426 #[test]
427 fn bare_stylesheet_link_href_normalized_to_relative() {
428 let info = parse_html_to_module(
429 FileId(0),
430 r#"<link rel="stylesheet" href="styles.css" />"#,
431 0,
432 );
433 assert_eq!(info.imports.len(), 1);
434 assert_eq!(info.imports[0].source, "./styles.css");
435 }
436
437 #[test]
438 fn bare_link_href_reversed_attrs_normalized_to_relative() {
439 let info = parse_html_to_module(
440 FileId(0),
441 r#"<link href="styles.css" rel="stylesheet" />"#,
442 0,
443 );
444 assert_eq!(info.imports.len(), 1);
445 assert_eq!(info.imports[0].source, "./styles.css");
446 }
447
448 #[test]
449 fn bare_modulepreload_link_href_normalized_to_relative() {
450 let info = parse_html_to_module(
451 FileId(0),
452 r#"<link rel="modulepreload" href="vendor.js" />"#,
453 0,
454 );
455 assert_eq!(info.imports.len(), 1);
456 assert_eq!(info.imports[0].source, "./vendor.js");
457 }
458
459 #[test]
460 fn bare_asset_with_subdir_normalized_to_relative() {
461 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
462 assert_eq!(info.imports.len(), 1);
463 assert_eq!(info.imports[0].source, "./assets/app.js");
464 }
465
466 #[test]
467 fn root_absolute_script_src_unchanged() {
468 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
469 assert_eq!(info.imports.len(), 1);
470 assert_eq!(info.imports[0].source, "/src/main.ts");
471 }
472
473 #[test]
474 fn parent_relative_script_src_unchanged() {
475 let info = parse_html_to_module(
476 FileId(0),
477 r#"<script src="../shared/vendor.js"></script>"#,
478 0,
479 );
480 assert_eq!(info.imports.len(), 1);
481 assert_eq!(info.imports[0].source, "../shared/vendor.js");
482 }
483
484 #[test]
485 fn skips_preload_link() {
486 let info = parse_html_to_module(
487 FileId(0),
488 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
489 0,
490 );
491 assert!(info.imports.is_empty());
492 }
493
494 #[test]
495 fn skips_icon_link() {
496 let info =
497 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
498 assert!(info.imports.is_empty());
499 }
500
501 #[test]
502 fn skips_remote_stylesheet() {
503 let info = parse_html_to_module(
504 FileId(0),
505 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
506 0,
507 );
508 assert!(info.imports.is_empty());
509 }
510
511 #[test]
512 fn skips_commented_out_script() {
513 let info = parse_html_to_module(
514 FileId(0),
515 r#"<!-- <script src="./old.js"></script> -->
516 <script src="./new.js"></script>"#,
517 0,
518 );
519 assert_eq!(info.imports.len(), 1);
520 assert_eq!(info.imports[0].source, "./new.js");
521 }
522
523 #[test]
524 fn skips_commented_out_link() {
525 let info = parse_html_to_module(
526 FileId(0),
527 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
528 <link rel="stylesheet" href="./new.css" />"#,
529 0,
530 );
531 assert_eq!(info.imports.len(), 1);
532 assert_eq!(info.imports[0].source, "./new.css");
533 }
534
535 #[test]
536 fn handles_multiline_script_tag() {
537 let info = parse_html_to_module(
538 FileId(0),
539 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
540 0,
541 );
542 assert_eq!(info.imports.len(), 1);
543 assert_eq!(info.imports[0].source, "./src/entry.js");
544 }
545
546 #[test]
547 fn handles_multiline_link_tag() {
548 let info = parse_html_to_module(
549 FileId(0),
550 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
551 0,
552 );
553 assert_eq!(info.imports.len(), 1);
554 assert_eq!(info.imports[0].source, "./src/global.css");
555 }
556
557 #[test]
558 fn full_vite_html() {
559 let info = parse_html_to_module(
560 FileId(0),
561 r#"<!doctype html>
562<html>
563 <head>
564 <link rel="stylesheet" href="./src/global.css" />
565 <link rel="icon" href="/favicon.ico" />
566 </head>
567 <body>
568 <div id="app"></div>
569 <script type="module" src="./src/entry.js"></script>
570 </body>
571</html>"#,
572 0,
573 );
574 assert_eq!(info.imports.len(), 2);
575 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
576 assert!(sources.contains(&"./src/global.css"));
577 assert!(sources.contains(&"./src/entry.js"));
578 }
579
580 #[test]
581 fn empty_html() {
582 let info = parse_html_to_module(FileId(0), "", 0);
583 assert!(info.imports.is_empty());
584 }
585
586 #[test]
587 fn html_with_no_assets() {
588 let info = parse_html_to_module(
589 FileId(0),
590 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
591 0,
592 );
593 assert!(info.imports.is_empty());
594 }
595
596 #[test]
597 fn single_quoted_attributes() {
598 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
599 assert_eq!(info.imports.len(), 1);
600 assert_eq!(info.imports[0].source, "./src/entry.js");
601 }
602
603 #[test]
604 fn all_imports_are_side_effect() {
605 let info = parse_html_to_module(
606 FileId(0),
607 r#"<script src="./entry.js"></script>
608 <link rel="stylesheet" href="./style.css" />"#,
609 0,
610 );
611 for imp in &info.imports {
612 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
613 assert!(imp.local_name.is_empty());
614 assert!(!imp.is_type_only);
615 }
616 }
617
618 #[test]
619 fn suppression_comments_extracted() {
620 let info = parse_html_to_module(
621 FileId(0),
622 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
623 0,
624 );
625 assert_eq!(info.imports.len(), 1);
626 }
627
628 #[test]
629 fn angular_template_extracts_member_refs() {
630 let info = parse_html_to_module(
631 FileId(0),
632 "<h1>{{ title() }}</h1>\n\
633 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
634 <button (click)=\"onButtonClick()\">Toggle</button>",
635 0,
636 );
637 let names: rustc_hash::FxHashSet<&str> = info
638 .member_accesses
639 .iter()
640 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
641 .map(|a| a.member.as_str())
642 .collect();
643 assert!(names.contains("title"), "should contain 'title'");
644 assert!(
645 names.contains("isHighlighted"),
646 "should contain 'isHighlighted'"
647 );
648 assert!(names.contains("greeting"), "should contain 'greeting'");
649 assert!(
650 names.contains("onButtonClick"),
651 "should contain 'onButtonClick'"
652 );
653 }
654
655 #[test]
656 fn plain_html_no_angular_refs() {
657 let info = parse_html_to_module(
658 FileId(0),
659 "<!doctype html><html><body><h1>Hello</h1></body></html>",
660 0,
661 );
662 assert!(info.member_accesses.is_empty());
663 }
664}