1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| crate::static_regex(r"(?s)<!--.*?-->"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 crate::static_regex(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 crate::static_regex(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39});
40
41static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
43 crate::static_regex(
44 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
45 )
46});
47
48pub(crate) fn is_html_file(path: &Path) -> bool {
50 path.extension()
51 .and_then(|e| e.to_str())
52 .is_some_and(|ext| ext == "html")
53}
54
55pub(crate) fn is_remote_url(src: &str) -> bool {
57 src.starts_with("http://")
58 || src.starts_with("https://")
59 || src.starts_with("//")
60 || src.starts_with("data:")
61}
62
63pub(crate) fn is_template_placeholder(value: &str) -> bool {
78 value.contains("{{") || value.contains("###")
79}
80
81pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
88 let stripped = HTML_COMMENT_RE.replace_all(source, "");
89 let mut refs: Vec<String> = Vec::new();
90
91 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
92 if let Some(m) = cap.get(1) {
93 let src = m.as_str().trim();
94 if !src.is_empty() && !is_remote_url(src) && !is_template_placeholder(src) {
95 refs.push(src.to_string());
96 }
97 }
98 }
99
100 for cap in LINK_HREF_RE.captures_iter(&stripped) {
101 if let Some(m) = cap.get(2) {
102 let href = m.as_str().trim();
103 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
104 refs.push(href.to_string());
105 }
106 }
107 }
108 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
109 if let Some(m) = cap.get(1) {
110 let href = m.as_str().trim();
111 if !href.is_empty() && !is_remote_url(href) && !is_template_placeholder(href) {
112 refs.push(href.to_string());
113 }
114 }
115 }
116
117 refs
118}
119
120#[cfg(test)]
122pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
123 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
124}
125
126pub(crate) fn parse_html_to_module_with_complexity(
128 file_id: FileId,
129 source: &str,
130 content_hash: u64,
131 need_complexity: bool,
132) -> ModuleInfo {
133 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
134
135 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
136 .into_iter()
137 .map(|raw| ImportInfo {
138 source: normalize_asset_url(&raw),
139 imported_name: ImportedName::SideEffect,
140 local_name: String::new(),
141 is_type_only: false,
142 from_style: false,
143 span: Span::default(),
144 source_span: Span::default(),
145 })
146 .collect();
147
148 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
149 imports.dedup_by(|a, b| a.source == b.source);
150
151 let angular::AngularTemplateRefs {
152 identifiers,
153 member_accesses: template_member_accesses,
154 security_sinks,
155 } = angular::collect_angular_template_refs(source);
156 let mut member_accesses: Vec<MemberAccess> = identifiers
157 .into_iter()
158 .map(|name| MemberAccess {
159 object: ANGULAR_TPL_SENTINEL.to_string(),
160 member: name,
161 })
162 .collect();
163 member_accesses.extend(template_member_accesses);
164
165 let complexity = if need_complexity {
166 crate::template_complexity::compute_angular_template_complexity(source)
167 .into_iter()
168 .collect()
169 } else {
170 Vec::new()
171 };
172
173 ModuleInfo {
174 file_id,
175 exports: Vec::new(),
176 imports,
177 re_exports: Vec::new(),
178 dynamic_imports: Vec::new(),
179 dynamic_import_patterns: Vec::new(),
180 require_calls: Vec::new(),
181 package_path_references: Vec::new(),
182 member_accesses,
183 whole_object_uses: Vec::new(),
184 has_cjs_exports: false,
185 has_angular_component_template_url: false,
186 content_hash,
187 suppressions: parsed_suppressions.suppressions,
188 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
189 unused_import_bindings: Vec::new(),
190 type_referenced_import_bindings: Vec::new(),
191 value_referenced_import_bindings: Vec::new(),
192 line_offsets: fallow_types::extract::compute_line_offsets(source),
193 complexity,
194 flag_uses: Vec::new(),
195 class_heritage: vec![],
196 injection_tokens: vec![],
197 local_type_declarations: Vec::new(),
198 public_signature_type_references: Vec::new(),
199 namespace_object_aliases: Vec::new(),
200 iconify_prefixes: Vec::new(),
201 iconify_icon_names: Vec::new(),
202 auto_import_candidates: Vec::new(),
203 directives: Vec::new(),
204 security_sinks,
205 security_sinks_skipped: 0,
206 tainted_bindings: Vec::new(),
207 sanitized_sink_args: Vec::new(),
208 }
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn is_html_file_html() {
217 assert!(is_html_file(Path::new("index.html")));
218 }
219
220 #[test]
221 fn is_html_file_nested() {
222 assert!(is_html_file(Path::new("pages/about.html")));
223 }
224
225 #[test]
226 fn is_html_file_rejects_htm() {
227 assert!(!is_html_file(Path::new("index.htm")));
228 }
229
230 #[test]
231 fn is_html_file_rejects_js() {
232 assert!(!is_html_file(Path::new("app.js")));
233 }
234
235 #[test]
236 fn is_html_file_rejects_ts() {
237 assert!(!is_html_file(Path::new("app.ts")));
238 }
239
240 #[test]
241 fn is_html_file_rejects_vue() {
242 assert!(!is_html_file(Path::new("App.vue")));
243 }
244
245 #[test]
246 fn remote_url_http() {
247 assert!(is_remote_url("http://example.com/script.js"));
248 }
249
250 #[test]
251 fn remote_url_https() {
252 assert!(is_remote_url("https://cdn.example.com/style.css"));
253 }
254
255 #[test]
256 fn remote_url_protocol_relative() {
257 assert!(is_remote_url("//cdn.example.com/lib.js"));
258 }
259
260 #[test]
261 fn remote_url_data() {
262 assert!(is_remote_url("data:text/javascript;base64,abc"));
263 }
264
265 #[test]
266 fn local_relative_not_remote() {
267 assert!(!is_remote_url("./src/entry.js"));
268 }
269
270 #[test]
271 fn local_root_relative_not_remote() {
272 assert!(!is_remote_url("/src/entry.js"));
273 }
274
275 #[test]
276 fn extracts_module_script_src() {
277 let info = parse_html_to_module(
278 FileId(0),
279 r#"<script type="module" src="./src/entry.js"></script>"#,
280 0,
281 );
282 assert_eq!(info.imports.len(), 1);
283 assert_eq!(info.imports[0].source, "./src/entry.js");
284 }
285
286 #[test]
287 fn extracts_plain_script_src() {
288 let info = parse_html_to_module(
289 FileId(0),
290 r#"<script src="./src/polyfills.js"></script>"#,
291 0,
292 );
293 assert_eq!(info.imports.len(), 1);
294 assert_eq!(info.imports[0].source, "./src/polyfills.js");
295 }
296
297 #[test]
298 fn extracts_multiple_scripts() {
299 let info = parse_html_to_module(
300 FileId(0),
301 r#"
302 <script type="module" src="./src/entry.js"></script>
303 <script src="./src/polyfills.js"></script>
304 "#,
305 0,
306 );
307 assert_eq!(info.imports.len(), 2);
308 }
309
310 #[test]
311 fn skips_inline_script() {
312 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
313 assert!(info.imports.is_empty());
314 }
315
316 #[test]
317 fn skips_handlebars_placeholder_in_script_src() {
318 let info = parse_html_to_module(
319 FileId(0),
320 r#"<script src="{{rootURL}}assets/app.js"></script>
321 <script src="{{config.assetsPath}}vendor.js"></script>"#,
322 0,
323 );
324 assert!(
325 info.imports.is_empty(),
326 "Handlebars-placeholder script srcs should not enter the import graph; got {:?}",
327 info.imports
328 );
329 }
330
331 #[test]
332 fn skips_handlebars_placeholder_in_link_href() {
333 let info = parse_html_to_module(
334 FileId(0),
335 r#"<link rel="stylesheet" href="{{rootURL}}assets/app.css">"#,
336 0,
337 );
338 assert!(info.imports.is_empty());
339 }
340
341 #[test]
342 fn skips_ember_cli_blueprint_placeholder() {
343 let info = parse_html_to_module(
344 FileId(0),
345 r####"<script src="###APPNAME###/app.js"></script>"####,
346 0,
347 );
348 assert!(info.imports.is_empty());
349 }
350
351 #[test]
352 fn extracts_normal_specifier_alongside_placeholders() {
353 let info = parse_html_to_module(
354 FileId(0),
355 r#"<script src="{{rootURL}}assets/app.js"></script>
356 <script src="./src/main.ts"></script>"#,
357 0,
358 );
359 assert_eq!(info.imports.len(), 1);
360 assert_eq!(info.imports[0].source, "./src/main.ts");
361 }
362
363 #[test]
364 fn skips_remote_script() {
365 let info = parse_html_to_module(
366 FileId(0),
367 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
368 0,
369 );
370 assert!(info.imports.is_empty());
371 }
372
373 #[test]
374 fn skips_protocol_relative_script() {
375 let info = parse_html_to_module(
376 FileId(0),
377 r#"<script src="//cdn.example.com/lib.js"></script>"#,
378 0,
379 );
380 assert!(info.imports.is_empty());
381 }
382
383 #[test]
384 fn extracts_stylesheet_link() {
385 let info = parse_html_to_module(
386 FileId(0),
387 r#"<link rel="stylesheet" href="./src/global.css" />"#,
388 0,
389 );
390 assert_eq!(info.imports.len(), 1);
391 assert_eq!(info.imports[0].source, "./src/global.css");
392 }
393
394 #[test]
395 fn extracts_modulepreload_link() {
396 let info = parse_html_to_module(
397 FileId(0),
398 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
399 0,
400 );
401 assert_eq!(info.imports.len(), 1);
402 assert_eq!(info.imports[0].source, "./src/vendor.js");
403 }
404
405 #[test]
406 fn extracts_link_with_reversed_attrs() {
407 let info = parse_html_to_module(
408 FileId(0),
409 r#"<link href="./src/global.css" rel="stylesheet" />"#,
410 0,
411 );
412 assert_eq!(info.imports.len(), 1);
413 assert_eq!(info.imports[0].source, "./src/global.css");
414 }
415
416 #[test]
417 fn bare_script_src_normalized_to_relative() {
418 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
419 assert_eq!(info.imports.len(), 1);
420 assert_eq!(info.imports[0].source, "./app.js");
421 }
422
423 #[test]
424 fn bare_module_script_src_normalized_to_relative() {
425 let info = parse_html_to_module(
426 FileId(0),
427 r#"<script type="module" src="main.ts"></script>"#,
428 0,
429 );
430 assert_eq!(info.imports.len(), 1);
431 assert_eq!(info.imports[0].source, "./main.ts");
432 }
433
434 #[test]
435 fn bare_stylesheet_link_href_normalized_to_relative() {
436 let info = parse_html_to_module(
437 FileId(0),
438 r#"<link rel="stylesheet" href="styles.css" />"#,
439 0,
440 );
441 assert_eq!(info.imports.len(), 1);
442 assert_eq!(info.imports[0].source, "./styles.css");
443 }
444
445 #[test]
446 fn bare_link_href_reversed_attrs_normalized_to_relative() {
447 let info = parse_html_to_module(
448 FileId(0),
449 r#"<link href="styles.css" rel="stylesheet" />"#,
450 0,
451 );
452 assert_eq!(info.imports.len(), 1);
453 assert_eq!(info.imports[0].source, "./styles.css");
454 }
455
456 #[test]
457 fn bare_modulepreload_link_href_normalized_to_relative() {
458 let info = parse_html_to_module(
459 FileId(0),
460 r#"<link rel="modulepreload" href="vendor.js" />"#,
461 0,
462 );
463 assert_eq!(info.imports.len(), 1);
464 assert_eq!(info.imports[0].source, "./vendor.js");
465 }
466
467 #[test]
468 fn bare_asset_with_subdir_normalized_to_relative() {
469 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
470 assert_eq!(info.imports.len(), 1);
471 assert_eq!(info.imports[0].source, "./assets/app.js");
472 }
473
474 #[test]
475 fn root_absolute_script_src_unchanged() {
476 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
477 assert_eq!(info.imports.len(), 1);
478 assert_eq!(info.imports[0].source, "/src/main.ts");
479 }
480
481 #[test]
482 fn parent_relative_script_src_unchanged() {
483 let info = parse_html_to_module(
484 FileId(0),
485 r#"<script src="../shared/vendor.js"></script>"#,
486 0,
487 );
488 assert_eq!(info.imports.len(), 1);
489 assert_eq!(info.imports[0].source, "../shared/vendor.js");
490 }
491
492 #[test]
493 fn skips_preload_link() {
494 let info = parse_html_to_module(
495 FileId(0),
496 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
497 0,
498 );
499 assert!(info.imports.is_empty());
500 }
501
502 #[test]
503 fn skips_icon_link() {
504 let info =
505 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
506 assert!(info.imports.is_empty());
507 }
508
509 #[test]
510 fn skips_remote_stylesheet() {
511 let info = parse_html_to_module(
512 FileId(0),
513 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
514 0,
515 );
516 assert!(info.imports.is_empty());
517 }
518
519 #[test]
520 fn skips_commented_out_script() {
521 let info = parse_html_to_module(
522 FileId(0),
523 r#"<!-- <script src="./old.js"></script> -->
524 <script src="./new.js"></script>"#,
525 0,
526 );
527 assert_eq!(info.imports.len(), 1);
528 assert_eq!(info.imports[0].source, "./new.js");
529 }
530
531 #[test]
532 fn skips_commented_out_link() {
533 let info = parse_html_to_module(
534 FileId(0),
535 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
536 <link rel="stylesheet" href="./new.css" />"#,
537 0,
538 );
539 assert_eq!(info.imports.len(), 1);
540 assert_eq!(info.imports[0].source, "./new.css");
541 }
542
543 #[test]
544 fn handles_multiline_script_tag() {
545 let info = parse_html_to_module(
546 FileId(0),
547 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
548 0,
549 );
550 assert_eq!(info.imports.len(), 1);
551 assert_eq!(info.imports[0].source, "./src/entry.js");
552 }
553
554 #[test]
555 fn handles_multiline_link_tag() {
556 let info = parse_html_to_module(
557 FileId(0),
558 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
559 0,
560 );
561 assert_eq!(info.imports.len(), 1);
562 assert_eq!(info.imports[0].source, "./src/global.css");
563 }
564
565 #[test]
566 fn full_vite_html() {
567 let info = parse_html_to_module(
568 FileId(0),
569 r#"<!doctype html>
570<html>
571 <head>
572 <link rel="stylesheet" href="./src/global.css" />
573 <link rel="icon" href="/favicon.ico" />
574 </head>
575 <body>
576 <div id="app"></div>
577 <script type="module" src="./src/entry.js"></script>
578 </body>
579</html>"#,
580 0,
581 );
582 assert_eq!(info.imports.len(), 2);
583 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
584 assert!(sources.contains(&"./src/global.css"));
585 assert!(sources.contains(&"./src/entry.js"));
586 }
587
588 #[test]
589 fn empty_html() {
590 let info = parse_html_to_module(FileId(0), "", 0);
591 assert!(info.imports.is_empty());
592 }
593
594 #[test]
595 fn html_with_no_assets() {
596 let info = parse_html_to_module(
597 FileId(0),
598 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
599 0,
600 );
601 assert!(info.imports.is_empty());
602 }
603
604 #[test]
605 fn single_quoted_attributes() {
606 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
607 assert_eq!(info.imports.len(), 1);
608 assert_eq!(info.imports[0].source, "./src/entry.js");
609 }
610
611 #[test]
612 fn all_imports_are_side_effect() {
613 let info = parse_html_to_module(
614 FileId(0),
615 r#"<script src="./entry.js"></script>
616 <link rel="stylesheet" href="./style.css" />"#,
617 0,
618 );
619 for imp in &info.imports {
620 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
621 assert!(imp.local_name.is_empty());
622 assert!(!imp.is_type_only);
623 }
624 }
625
626 #[test]
627 fn suppression_comments_extracted() {
628 let info = parse_html_to_module(
629 FileId(0),
630 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
631 0,
632 );
633 assert_eq!(info.imports.len(), 1);
634 }
635
636 #[test]
637 fn angular_template_extracts_member_refs() {
638 let info = parse_html_to_module(
639 FileId(0),
640 "<h1>{{ title() }}</h1>\n\
641 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
642 <button (click)=\"onButtonClick()\">Toggle</button>",
643 0,
644 );
645 let names: rustc_hash::FxHashSet<&str> = info
646 .member_accesses
647 .iter()
648 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
649 .map(|a| a.member.as_str())
650 .collect();
651 assert!(names.contains("title"), "should contain 'title'");
652 assert!(
653 names.contains("isHighlighted"),
654 "should contain 'isHighlighted'"
655 );
656 assert!(names.contains("greeting"), "should contain 'greeting'");
657 assert!(
658 names.contains("onButtonClick"),
659 "should contain 'onButtonClick'"
660 );
661 }
662
663 #[test]
664 fn plain_html_no_angular_refs() {
665 let info = parse_html_to_module(
666 FileId(0),
667 "<!doctype html><html><body><h1>Hello</h1></body></html>",
668 0,
669 );
670 assert!(info.member_accesses.is_empty());
671 }
672}