1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
74 let stripped = HTML_COMMENT_RE.replace_all(source, "");
75 let mut refs: Vec<String> = Vec::new();
76
77 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
78 if let Some(m) = cap.get(1) {
79 let src = m.as_str().trim();
80 if !src.is_empty() && !is_remote_url(src) {
81 refs.push(src.to_string());
82 }
83 }
84 }
85
86 for cap in LINK_HREF_RE.captures_iter(&stripped) {
87 if let Some(m) = cap.get(2) {
88 let href = m.as_str().trim();
89 if !href.is_empty() && !is_remote_url(href) {
90 refs.push(href.to_string());
91 }
92 }
93 }
94 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(1) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 refs.push(href.to_string());
99 }
100 }
101 }
102
103 refs
104}
105
106#[cfg(test)]
108pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
109 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
110}
111
112pub(crate) fn parse_html_to_module_with_complexity(
114 file_id: FileId,
115 source: &str,
116 content_hash: u64,
117 need_complexity: bool,
118) -> ModuleInfo {
119 let parsed_suppressions = crate::suppress::parse_suppressions_from_source(source);
120
121 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
124 .into_iter()
125 .map(|raw| ImportInfo {
126 source: normalize_asset_url(&raw),
127 imported_name: ImportedName::SideEffect,
128 local_name: String::new(),
129 is_type_only: false,
130 from_style: false,
131 span: Span::default(),
132 source_span: Span::default(),
133 })
134 .collect();
135
136 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
139 imports.dedup_by(|a, b| a.source == b.source);
140
141 let template_refs = angular::collect_angular_template_refs(source);
154 let mut member_accesses: Vec<MemberAccess> = template_refs
155 .identifiers
156 .into_iter()
157 .map(|name| MemberAccess {
158 object: ANGULAR_TPL_SENTINEL.to_string(),
159 member: name,
160 })
161 .collect();
162 member_accesses.extend(template_refs.member_accesses);
163
164 let complexity = if need_complexity {
165 crate::template_complexity::compute_angular_template_complexity(source)
166 .into_iter()
167 .collect()
168 } else {
169 Vec::new()
170 };
171
172 ModuleInfo {
173 file_id,
174 exports: Vec::new(),
175 imports,
176 re_exports: Vec::new(),
177 dynamic_imports: Vec::new(),
178 dynamic_import_patterns: Vec::new(),
179 require_calls: Vec::new(),
180 member_accesses,
181 whole_object_uses: Vec::new(),
182 has_cjs_exports: false,
183 has_angular_component_template_url: false,
184 content_hash,
185 suppressions: parsed_suppressions.suppressions,
186 unknown_suppression_kinds: parsed_suppressions.unknown_kinds,
187 unused_import_bindings: Vec::new(),
188 type_referenced_import_bindings: Vec::new(),
189 value_referenced_import_bindings: Vec::new(),
190 line_offsets: fallow_types::extract::compute_line_offsets(source),
191 complexity,
192 flag_uses: Vec::new(),
193 class_heritage: vec![],
194 local_type_declarations: Vec::new(),
195 public_signature_type_references: Vec::new(),
196 namespace_object_aliases: Vec::new(),
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 #[test]
207 fn is_html_file_html() {
208 assert!(is_html_file(Path::new("index.html")));
209 }
210
211 #[test]
212 fn is_html_file_nested() {
213 assert!(is_html_file(Path::new("pages/about.html")));
214 }
215
216 #[test]
217 fn is_html_file_rejects_htm() {
218 assert!(!is_html_file(Path::new("index.htm")));
219 }
220
221 #[test]
222 fn is_html_file_rejects_js() {
223 assert!(!is_html_file(Path::new("app.js")));
224 }
225
226 #[test]
227 fn is_html_file_rejects_ts() {
228 assert!(!is_html_file(Path::new("app.ts")));
229 }
230
231 #[test]
232 fn is_html_file_rejects_vue() {
233 assert!(!is_html_file(Path::new("App.vue")));
234 }
235
236 #[test]
239 fn remote_url_http() {
240 assert!(is_remote_url("http://example.com/script.js"));
241 }
242
243 #[test]
244 fn remote_url_https() {
245 assert!(is_remote_url("https://cdn.example.com/style.css"));
246 }
247
248 #[test]
249 fn remote_url_protocol_relative() {
250 assert!(is_remote_url("//cdn.example.com/lib.js"));
251 }
252
253 #[test]
254 fn remote_url_data() {
255 assert!(is_remote_url("data:text/javascript;base64,abc"));
256 }
257
258 #[test]
259 fn local_relative_not_remote() {
260 assert!(!is_remote_url("./src/entry.js"));
261 }
262
263 #[test]
264 fn local_root_relative_not_remote() {
265 assert!(!is_remote_url("/src/entry.js"));
266 }
267
268 #[test]
271 fn extracts_module_script_src() {
272 let info = parse_html_to_module(
273 FileId(0),
274 r#"<script type="module" src="./src/entry.js"></script>"#,
275 0,
276 );
277 assert_eq!(info.imports.len(), 1);
278 assert_eq!(info.imports[0].source, "./src/entry.js");
279 }
280
281 #[test]
282 fn extracts_plain_script_src() {
283 let info = parse_html_to_module(
284 FileId(0),
285 r#"<script src="./src/polyfills.js"></script>"#,
286 0,
287 );
288 assert_eq!(info.imports.len(), 1);
289 assert_eq!(info.imports[0].source, "./src/polyfills.js");
290 }
291
292 #[test]
293 fn extracts_multiple_scripts() {
294 let info = parse_html_to_module(
295 FileId(0),
296 r#"
297 <script type="module" src="./src/entry.js"></script>
298 <script src="./src/polyfills.js"></script>
299 "#,
300 0,
301 );
302 assert_eq!(info.imports.len(), 2);
303 }
304
305 #[test]
306 fn skips_inline_script() {
307 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
308 assert!(info.imports.is_empty());
309 }
310
311 #[test]
312 fn skips_remote_script() {
313 let info = parse_html_to_module(
314 FileId(0),
315 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
316 0,
317 );
318 assert!(info.imports.is_empty());
319 }
320
321 #[test]
322 fn skips_protocol_relative_script() {
323 let info = parse_html_to_module(
324 FileId(0),
325 r#"<script src="//cdn.example.com/lib.js"></script>"#,
326 0,
327 );
328 assert!(info.imports.is_empty());
329 }
330
331 #[test]
334 fn extracts_stylesheet_link() {
335 let info = parse_html_to_module(
336 FileId(0),
337 r#"<link rel="stylesheet" href="./src/global.css" />"#,
338 0,
339 );
340 assert_eq!(info.imports.len(), 1);
341 assert_eq!(info.imports[0].source, "./src/global.css");
342 }
343
344 #[test]
345 fn extracts_modulepreload_link() {
346 let info = parse_html_to_module(
347 FileId(0),
348 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
349 0,
350 );
351 assert_eq!(info.imports.len(), 1);
352 assert_eq!(info.imports[0].source, "./src/vendor.js");
353 }
354
355 #[test]
356 fn extracts_link_with_reversed_attrs() {
357 let info = parse_html_to_module(
358 FileId(0),
359 r#"<link href="./src/global.css" rel="stylesheet" />"#,
360 0,
361 );
362 assert_eq!(info.imports.len(), 1);
363 assert_eq!(info.imports[0].source, "./src/global.css");
364 }
365
366 #[test]
373 fn bare_script_src_normalized_to_relative() {
374 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
375 assert_eq!(info.imports.len(), 1);
376 assert_eq!(info.imports[0].source, "./app.js");
377 }
378
379 #[test]
380 fn bare_module_script_src_normalized_to_relative() {
381 let info = parse_html_to_module(
382 FileId(0),
383 r#"<script type="module" src="main.ts"></script>"#,
384 0,
385 );
386 assert_eq!(info.imports.len(), 1);
387 assert_eq!(info.imports[0].source, "./main.ts");
388 }
389
390 #[test]
391 fn bare_stylesheet_link_href_normalized_to_relative() {
392 let info = parse_html_to_module(
393 FileId(0),
394 r#"<link rel="stylesheet" href="styles.css" />"#,
395 0,
396 );
397 assert_eq!(info.imports.len(), 1);
398 assert_eq!(info.imports[0].source, "./styles.css");
399 }
400
401 #[test]
402 fn bare_link_href_reversed_attrs_normalized_to_relative() {
403 let info = parse_html_to_module(
404 FileId(0),
405 r#"<link href="styles.css" rel="stylesheet" />"#,
406 0,
407 );
408 assert_eq!(info.imports.len(), 1);
409 assert_eq!(info.imports[0].source, "./styles.css");
410 }
411
412 #[test]
413 fn bare_modulepreload_link_href_normalized_to_relative() {
414 let info = parse_html_to_module(
415 FileId(0),
416 r#"<link rel="modulepreload" href="vendor.js" />"#,
417 0,
418 );
419 assert_eq!(info.imports.len(), 1);
420 assert_eq!(info.imports[0].source, "./vendor.js");
421 }
422
423 #[test]
424 fn bare_asset_with_subdir_normalized_to_relative() {
425 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
426 assert_eq!(info.imports.len(), 1);
427 assert_eq!(info.imports[0].source, "./assets/app.js");
428 }
429
430 #[test]
431 fn root_absolute_script_src_unchanged() {
432 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
435 assert_eq!(info.imports.len(), 1);
436 assert_eq!(info.imports[0].source, "/src/main.ts");
437 }
438
439 #[test]
440 fn parent_relative_script_src_unchanged() {
441 let info = parse_html_to_module(
442 FileId(0),
443 r#"<script src="../shared/vendor.js"></script>"#,
444 0,
445 );
446 assert_eq!(info.imports.len(), 1);
447 assert_eq!(info.imports[0].source, "../shared/vendor.js");
448 }
449
450 #[test]
451 fn skips_preload_link() {
452 let info = parse_html_to_module(
453 FileId(0),
454 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
455 0,
456 );
457 assert!(info.imports.is_empty());
458 }
459
460 #[test]
461 fn skips_icon_link() {
462 let info =
463 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
464 assert!(info.imports.is_empty());
465 }
466
467 #[test]
468 fn skips_remote_stylesheet() {
469 let info = parse_html_to_module(
470 FileId(0),
471 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
472 0,
473 );
474 assert!(info.imports.is_empty());
475 }
476
477 #[test]
480 fn skips_commented_out_script() {
481 let info = parse_html_to_module(
482 FileId(0),
483 r#"<!-- <script src="./old.js"></script> -->
484 <script src="./new.js"></script>"#,
485 0,
486 );
487 assert_eq!(info.imports.len(), 1);
488 assert_eq!(info.imports[0].source, "./new.js");
489 }
490
491 #[test]
492 fn skips_commented_out_link() {
493 let info = parse_html_to_module(
494 FileId(0),
495 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
496 <link rel="stylesheet" href="./new.css" />"#,
497 0,
498 );
499 assert_eq!(info.imports.len(), 1);
500 assert_eq!(info.imports[0].source, "./new.css");
501 }
502
503 #[test]
506 fn handles_multiline_script_tag() {
507 let info = parse_html_to_module(
508 FileId(0),
509 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
510 0,
511 );
512 assert_eq!(info.imports.len(), 1);
513 assert_eq!(info.imports[0].source, "./src/entry.js");
514 }
515
516 #[test]
517 fn handles_multiline_link_tag() {
518 let info = parse_html_to_module(
519 FileId(0),
520 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
521 0,
522 );
523 assert_eq!(info.imports.len(), 1);
524 assert_eq!(info.imports[0].source, "./src/global.css");
525 }
526
527 #[test]
530 fn full_vite_html() {
531 let info = parse_html_to_module(
532 FileId(0),
533 r#"<!doctype html>
534<html>
535 <head>
536 <link rel="stylesheet" href="./src/global.css" />
537 <link rel="icon" href="/favicon.ico" />
538 </head>
539 <body>
540 <div id="app"></div>
541 <script type="module" src="./src/entry.js"></script>
542 </body>
543</html>"#,
544 0,
545 );
546 assert_eq!(info.imports.len(), 2);
547 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
548 assert!(sources.contains(&"./src/global.css"));
549 assert!(sources.contains(&"./src/entry.js"));
550 }
551
552 #[test]
555 fn empty_html() {
556 let info = parse_html_to_module(FileId(0), "", 0);
557 assert!(info.imports.is_empty());
558 }
559
560 #[test]
561 fn html_with_no_assets() {
562 let info = parse_html_to_module(
563 FileId(0),
564 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
565 0,
566 );
567 assert!(info.imports.is_empty());
568 }
569
570 #[test]
571 fn single_quoted_attributes() {
572 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
573 assert_eq!(info.imports.len(), 1);
574 assert_eq!(info.imports[0].source, "./src/entry.js");
575 }
576
577 #[test]
578 fn all_imports_are_side_effect() {
579 let info = parse_html_to_module(
580 FileId(0),
581 r#"<script src="./entry.js"></script>
582 <link rel="stylesheet" href="./style.css" />"#,
583 0,
584 );
585 for imp in &info.imports {
586 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
587 assert!(imp.local_name.is_empty());
588 assert!(!imp.is_type_only);
589 }
590 }
591
592 #[test]
593 fn suppression_comments_extracted() {
594 let info = parse_html_to_module(
595 FileId(0),
596 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
597 0,
598 );
599 assert_eq!(info.imports.len(), 1);
603 }
604
605 #[test]
608 fn angular_template_extracts_member_refs() {
609 let info = parse_html_to_module(
610 FileId(0),
611 "<h1>{{ title() }}</h1>\n\
612 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
613 <button (click)=\"onButtonClick()\">Toggle</button>",
614 0,
615 );
616 let names: rustc_hash::FxHashSet<&str> = info
617 .member_accesses
618 .iter()
619 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
620 .map(|a| a.member.as_str())
621 .collect();
622 assert!(names.contains("title"), "should contain 'title'");
623 assert!(
624 names.contains("isHighlighted"),
625 "should contain 'isHighlighted'"
626 );
627 assert!(names.contains("greeting"), "should contain 'greeting'");
628 assert!(
629 names.contains("onButtonClick"),
630 "should contain 'onButtonClick'"
631 );
632 }
633
634 #[test]
635 fn plain_html_no_angular_refs() {
636 let info = parse_html_to_module(
637 FileId(0),
638 "<!doctype html><html><body><h1>Hello</h1></body></html>",
639 0,
640 );
641 assert!(info.member_accesses.is_empty());
642 }
643}