1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
74 let stripped = HTML_COMMENT_RE.replace_all(source, "");
75 let mut refs: Vec<String> = Vec::new();
76
77 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
78 if let Some(m) = cap.get(1) {
79 let src = m.as_str().trim();
80 if !src.is_empty() && !is_remote_url(src) {
81 refs.push(src.to_string());
82 }
83 }
84 }
85
86 for cap in LINK_HREF_RE.captures_iter(&stripped) {
87 if let Some(m) = cap.get(2) {
88 let href = m.as_str().trim();
89 if !href.is_empty() && !is_remote_url(href) {
90 refs.push(href.to_string());
91 }
92 }
93 }
94 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(1) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 refs.push(href.to_string());
99 }
100 }
101 }
102
103 refs
104}
105
106#[cfg(test)]
108pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
109 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
110}
111
112pub(crate) fn parse_html_to_module_with_complexity(
114 file_id: FileId,
115 source: &str,
116 content_hash: u64,
117 need_complexity: bool,
118) -> ModuleInfo {
119 let suppressions = crate::suppress::parse_suppressions_from_source(source);
120
121 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
124 .into_iter()
125 .map(|raw| ImportInfo {
126 source: normalize_asset_url(&raw),
127 imported_name: ImportedName::SideEffect,
128 local_name: String::new(),
129 is_type_only: false,
130 from_style: false,
131 span: Span::default(),
132 source_span: Span::default(),
133 })
134 .collect();
135
136 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
139 imports.dedup_by(|a, b| a.source == b.source);
140
141 let template_refs = angular::collect_angular_template_refs(source);
154 let mut member_accesses: Vec<MemberAccess> = template_refs
155 .identifiers
156 .into_iter()
157 .map(|name| MemberAccess {
158 object: ANGULAR_TPL_SENTINEL.to_string(),
159 member: name,
160 })
161 .collect();
162 member_accesses.extend(template_refs.member_accesses);
163
164 let complexity = if need_complexity {
165 crate::template_complexity::compute_angular_template_complexity(source)
166 .into_iter()
167 .collect()
168 } else {
169 Vec::new()
170 };
171
172 ModuleInfo {
173 file_id,
174 exports: Vec::new(),
175 imports,
176 re_exports: Vec::new(),
177 dynamic_imports: Vec::new(),
178 dynamic_import_patterns: Vec::new(),
179 require_calls: Vec::new(),
180 member_accesses,
181 whole_object_uses: Vec::new(),
182 has_cjs_exports: false,
183 content_hash,
184 suppressions,
185 unused_import_bindings: Vec::new(),
186 type_referenced_import_bindings: Vec::new(),
187 value_referenced_import_bindings: Vec::new(),
188 line_offsets: fallow_types::extract::compute_line_offsets(source),
189 complexity,
190 flag_uses: Vec::new(),
191 class_heritage: vec![],
192 local_type_declarations: Vec::new(),
193 public_signature_type_references: Vec::new(),
194 namespace_object_aliases: Vec::new(),
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201
202 #[test]
205 fn is_html_file_html() {
206 assert!(is_html_file(Path::new("index.html")));
207 }
208
209 #[test]
210 fn is_html_file_nested() {
211 assert!(is_html_file(Path::new("pages/about.html")));
212 }
213
214 #[test]
215 fn is_html_file_rejects_htm() {
216 assert!(!is_html_file(Path::new("index.htm")));
217 }
218
219 #[test]
220 fn is_html_file_rejects_js() {
221 assert!(!is_html_file(Path::new("app.js")));
222 }
223
224 #[test]
225 fn is_html_file_rejects_ts() {
226 assert!(!is_html_file(Path::new("app.ts")));
227 }
228
229 #[test]
230 fn is_html_file_rejects_vue() {
231 assert!(!is_html_file(Path::new("App.vue")));
232 }
233
234 #[test]
237 fn remote_url_http() {
238 assert!(is_remote_url("http://example.com/script.js"));
239 }
240
241 #[test]
242 fn remote_url_https() {
243 assert!(is_remote_url("https://cdn.example.com/style.css"));
244 }
245
246 #[test]
247 fn remote_url_protocol_relative() {
248 assert!(is_remote_url("//cdn.example.com/lib.js"));
249 }
250
251 #[test]
252 fn remote_url_data() {
253 assert!(is_remote_url("data:text/javascript;base64,abc"));
254 }
255
256 #[test]
257 fn local_relative_not_remote() {
258 assert!(!is_remote_url("./src/entry.js"));
259 }
260
261 #[test]
262 fn local_root_relative_not_remote() {
263 assert!(!is_remote_url("/src/entry.js"));
264 }
265
266 #[test]
269 fn extracts_module_script_src() {
270 let info = parse_html_to_module(
271 FileId(0),
272 r#"<script type="module" src="./src/entry.js"></script>"#,
273 0,
274 );
275 assert_eq!(info.imports.len(), 1);
276 assert_eq!(info.imports[0].source, "./src/entry.js");
277 }
278
279 #[test]
280 fn extracts_plain_script_src() {
281 let info = parse_html_to_module(
282 FileId(0),
283 r#"<script src="./src/polyfills.js"></script>"#,
284 0,
285 );
286 assert_eq!(info.imports.len(), 1);
287 assert_eq!(info.imports[0].source, "./src/polyfills.js");
288 }
289
290 #[test]
291 fn extracts_multiple_scripts() {
292 let info = parse_html_to_module(
293 FileId(0),
294 r#"
295 <script type="module" src="./src/entry.js"></script>
296 <script src="./src/polyfills.js"></script>
297 "#,
298 0,
299 );
300 assert_eq!(info.imports.len(), 2);
301 }
302
303 #[test]
304 fn skips_inline_script() {
305 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
306 assert!(info.imports.is_empty());
307 }
308
309 #[test]
310 fn skips_remote_script() {
311 let info = parse_html_to_module(
312 FileId(0),
313 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
314 0,
315 );
316 assert!(info.imports.is_empty());
317 }
318
319 #[test]
320 fn skips_protocol_relative_script() {
321 let info = parse_html_to_module(
322 FileId(0),
323 r#"<script src="//cdn.example.com/lib.js"></script>"#,
324 0,
325 );
326 assert!(info.imports.is_empty());
327 }
328
329 #[test]
332 fn extracts_stylesheet_link() {
333 let info = parse_html_to_module(
334 FileId(0),
335 r#"<link rel="stylesheet" href="./src/global.css" />"#,
336 0,
337 );
338 assert_eq!(info.imports.len(), 1);
339 assert_eq!(info.imports[0].source, "./src/global.css");
340 }
341
342 #[test]
343 fn extracts_modulepreload_link() {
344 let info = parse_html_to_module(
345 FileId(0),
346 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
347 0,
348 );
349 assert_eq!(info.imports.len(), 1);
350 assert_eq!(info.imports[0].source, "./src/vendor.js");
351 }
352
353 #[test]
354 fn extracts_link_with_reversed_attrs() {
355 let info = parse_html_to_module(
356 FileId(0),
357 r#"<link href="./src/global.css" rel="stylesheet" />"#,
358 0,
359 );
360 assert_eq!(info.imports.len(), 1);
361 assert_eq!(info.imports[0].source, "./src/global.css");
362 }
363
364 #[test]
371 fn bare_script_src_normalized_to_relative() {
372 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
373 assert_eq!(info.imports.len(), 1);
374 assert_eq!(info.imports[0].source, "./app.js");
375 }
376
377 #[test]
378 fn bare_module_script_src_normalized_to_relative() {
379 let info = parse_html_to_module(
380 FileId(0),
381 r#"<script type="module" src="main.ts"></script>"#,
382 0,
383 );
384 assert_eq!(info.imports.len(), 1);
385 assert_eq!(info.imports[0].source, "./main.ts");
386 }
387
388 #[test]
389 fn bare_stylesheet_link_href_normalized_to_relative() {
390 let info = parse_html_to_module(
391 FileId(0),
392 r#"<link rel="stylesheet" href="styles.css" />"#,
393 0,
394 );
395 assert_eq!(info.imports.len(), 1);
396 assert_eq!(info.imports[0].source, "./styles.css");
397 }
398
399 #[test]
400 fn bare_link_href_reversed_attrs_normalized_to_relative() {
401 let info = parse_html_to_module(
402 FileId(0),
403 r#"<link href="styles.css" rel="stylesheet" />"#,
404 0,
405 );
406 assert_eq!(info.imports.len(), 1);
407 assert_eq!(info.imports[0].source, "./styles.css");
408 }
409
410 #[test]
411 fn bare_modulepreload_link_href_normalized_to_relative() {
412 let info = parse_html_to_module(
413 FileId(0),
414 r#"<link rel="modulepreload" href="vendor.js" />"#,
415 0,
416 );
417 assert_eq!(info.imports.len(), 1);
418 assert_eq!(info.imports[0].source, "./vendor.js");
419 }
420
421 #[test]
422 fn bare_asset_with_subdir_normalized_to_relative() {
423 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
424 assert_eq!(info.imports.len(), 1);
425 assert_eq!(info.imports[0].source, "./assets/app.js");
426 }
427
428 #[test]
429 fn root_absolute_script_src_unchanged() {
430 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
433 assert_eq!(info.imports.len(), 1);
434 assert_eq!(info.imports[0].source, "/src/main.ts");
435 }
436
437 #[test]
438 fn parent_relative_script_src_unchanged() {
439 let info = parse_html_to_module(
440 FileId(0),
441 r#"<script src="../shared/vendor.js"></script>"#,
442 0,
443 );
444 assert_eq!(info.imports.len(), 1);
445 assert_eq!(info.imports[0].source, "../shared/vendor.js");
446 }
447
448 #[test]
449 fn skips_preload_link() {
450 let info = parse_html_to_module(
451 FileId(0),
452 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
453 0,
454 );
455 assert!(info.imports.is_empty());
456 }
457
458 #[test]
459 fn skips_icon_link() {
460 let info =
461 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
462 assert!(info.imports.is_empty());
463 }
464
465 #[test]
466 fn skips_remote_stylesheet() {
467 let info = parse_html_to_module(
468 FileId(0),
469 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
470 0,
471 );
472 assert!(info.imports.is_empty());
473 }
474
475 #[test]
478 fn skips_commented_out_script() {
479 let info = parse_html_to_module(
480 FileId(0),
481 r#"<!-- <script src="./old.js"></script> -->
482 <script src="./new.js"></script>"#,
483 0,
484 );
485 assert_eq!(info.imports.len(), 1);
486 assert_eq!(info.imports[0].source, "./new.js");
487 }
488
489 #[test]
490 fn skips_commented_out_link() {
491 let info = parse_html_to_module(
492 FileId(0),
493 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
494 <link rel="stylesheet" href="./new.css" />"#,
495 0,
496 );
497 assert_eq!(info.imports.len(), 1);
498 assert_eq!(info.imports[0].source, "./new.css");
499 }
500
501 #[test]
504 fn handles_multiline_script_tag() {
505 let info = parse_html_to_module(
506 FileId(0),
507 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
508 0,
509 );
510 assert_eq!(info.imports.len(), 1);
511 assert_eq!(info.imports[0].source, "./src/entry.js");
512 }
513
514 #[test]
515 fn handles_multiline_link_tag() {
516 let info = parse_html_to_module(
517 FileId(0),
518 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
519 0,
520 );
521 assert_eq!(info.imports.len(), 1);
522 assert_eq!(info.imports[0].source, "./src/global.css");
523 }
524
525 #[test]
528 fn full_vite_html() {
529 let info = parse_html_to_module(
530 FileId(0),
531 r#"<!doctype html>
532<html>
533 <head>
534 <link rel="stylesheet" href="./src/global.css" />
535 <link rel="icon" href="/favicon.ico" />
536 </head>
537 <body>
538 <div id="app"></div>
539 <script type="module" src="./src/entry.js"></script>
540 </body>
541</html>"#,
542 0,
543 );
544 assert_eq!(info.imports.len(), 2);
545 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
546 assert!(sources.contains(&"./src/global.css"));
547 assert!(sources.contains(&"./src/entry.js"));
548 }
549
550 #[test]
553 fn empty_html() {
554 let info = parse_html_to_module(FileId(0), "", 0);
555 assert!(info.imports.is_empty());
556 }
557
558 #[test]
559 fn html_with_no_assets() {
560 let info = parse_html_to_module(
561 FileId(0),
562 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
563 0,
564 );
565 assert!(info.imports.is_empty());
566 }
567
568 #[test]
569 fn single_quoted_attributes() {
570 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
571 assert_eq!(info.imports.len(), 1);
572 assert_eq!(info.imports[0].source, "./src/entry.js");
573 }
574
575 #[test]
576 fn all_imports_are_side_effect() {
577 let info = parse_html_to_module(
578 FileId(0),
579 r#"<script src="./entry.js"></script>
580 <link rel="stylesheet" href="./style.css" />"#,
581 0,
582 );
583 for imp in &info.imports {
584 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
585 assert!(imp.local_name.is_empty());
586 assert!(!imp.is_type_only);
587 }
588 }
589
590 #[test]
591 fn suppression_comments_extracted() {
592 let info = parse_html_to_module(
593 FileId(0),
594 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
595 0,
596 );
597 assert_eq!(info.imports.len(), 1);
601 }
602
603 #[test]
606 fn angular_template_extracts_member_refs() {
607 let info = parse_html_to_module(
608 FileId(0),
609 "<h1>{{ title() }}</h1>\n\
610 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
611 <button (click)=\"onButtonClick()\">Toggle</button>",
612 0,
613 );
614 let names: rustc_hash::FxHashSet<&str> = info
615 .member_accesses
616 .iter()
617 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
618 .map(|a| a.member.as_str())
619 .collect();
620 assert!(names.contains("title"), "should contain 'title'");
621 assert!(
622 names.contains("isHighlighted"),
623 "should contain 'isHighlighted'"
624 );
625 assert!(names.contains("greeting"), "should contain 'greeting'");
626 assert!(
627 names.contains("onButtonClick"),
628 "should contain 'onButtonClick'"
629 );
630 }
631
632 #[test]
633 fn plain_html_no_angular_refs() {
634 let info = parse_html_to_module(
635 FileId(0),
636 "<!doctype html><html><body><h1>Hello</h1></body></html>",
637 0,
638 );
639 assert!(info.member_accesses.is_empty());
640 }
641}