1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
74 let stripped = HTML_COMMENT_RE.replace_all(source, "");
75 let mut refs: Vec<String> = Vec::new();
76
77 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
78 if let Some(m) = cap.get(1) {
79 let src = m.as_str().trim();
80 if !src.is_empty() && !is_remote_url(src) {
81 refs.push(src.to_string());
82 }
83 }
84 }
85
86 for cap in LINK_HREF_RE.captures_iter(&stripped) {
87 if let Some(m) = cap.get(2) {
88 let href = m.as_str().trim();
89 if !href.is_empty() && !is_remote_url(href) {
90 refs.push(href.to_string());
91 }
92 }
93 }
94 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(1) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 refs.push(href.to_string());
99 }
100 }
101 }
102
103 refs
104}
105
106#[cfg(test)]
108pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
109 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
110}
111
112pub(crate) fn parse_html_to_module_with_complexity(
114 file_id: FileId,
115 source: &str,
116 content_hash: u64,
117 need_complexity: bool,
118) -> ModuleInfo {
119 let suppressions = crate::suppress::parse_suppressions_from_source(source);
120
121 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
124 .into_iter()
125 .map(|raw| ImportInfo {
126 source: normalize_asset_url(&raw),
127 imported_name: ImportedName::SideEffect,
128 local_name: String::new(),
129 is_type_only: false,
130 span: Span::default(),
131 source_span: Span::default(),
132 })
133 .collect();
134
135 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
138 imports.dedup_by(|a, b| a.source == b.source);
139
140 let template_refs = angular::collect_angular_template_refs(source);
153 let mut member_accesses: Vec<MemberAccess> = template_refs
154 .identifiers
155 .into_iter()
156 .map(|name| MemberAccess {
157 object: ANGULAR_TPL_SENTINEL.to_string(),
158 member: name,
159 })
160 .collect();
161 member_accesses.extend(template_refs.member_accesses);
162
163 let complexity = if need_complexity {
164 crate::template_complexity::compute_angular_template_complexity(source)
165 .into_iter()
166 .collect()
167 } else {
168 Vec::new()
169 };
170
171 ModuleInfo {
172 file_id,
173 exports: Vec::new(),
174 imports,
175 re_exports: Vec::new(),
176 dynamic_imports: Vec::new(),
177 dynamic_import_patterns: Vec::new(),
178 require_calls: Vec::new(),
179 member_accesses,
180 whole_object_uses: Vec::new(),
181 has_cjs_exports: false,
182 content_hash,
183 suppressions,
184 unused_import_bindings: Vec::new(),
185 type_referenced_import_bindings: Vec::new(),
186 value_referenced_import_bindings: Vec::new(),
187 line_offsets: fallow_types::extract::compute_line_offsets(source),
188 complexity,
189 flag_uses: Vec::new(),
190 class_heritage: vec![],
191 }
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197
198 #[test]
201 fn is_html_file_html() {
202 assert!(is_html_file(Path::new("index.html")));
203 }
204
205 #[test]
206 fn is_html_file_nested() {
207 assert!(is_html_file(Path::new("pages/about.html")));
208 }
209
210 #[test]
211 fn is_html_file_rejects_htm() {
212 assert!(!is_html_file(Path::new("index.htm")));
213 }
214
215 #[test]
216 fn is_html_file_rejects_js() {
217 assert!(!is_html_file(Path::new("app.js")));
218 }
219
220 #[test]
221 fn is_html_file_rejects_ts() {
222 assert!(!is_html_file(Path::new("app.ts")));
223 }
224
225 #[test]
226 fn is_html_file_rejects_vue() {
227 assert!(!is_html_file(Path::new("App.vue")));
228 }
229
230 #[test]
233 fn remote_url_http() {
234 assert!(is_remote_url("http://example.com/script.js"));
235 }
236
237 #[test]
238 fn remote_url_https() {
239 assert!(is_remote_url("https://cdn.example.com/style.css"));
240 }
241
242 #[test]
243 fn remote_url_protocol_relative() {
244 assert!(is_remote_url("//cdn.example.com/lib.js"));
245 }
246
247 #[test]
248 fn remote_url_data() {
249 assert!(is_remote_url("data:text/javascript;base64,abc"));
250 }
251
252 #[test]
253 fn local_relative_not_remote() {
254 assert!(!is_remote_url("./src/entry.js"));
255 }
256
257 #[test]
258 fn local_root_relative_not_remote() {
259 assert!(!is_remote_url("/src/entry.js"));
260 }
261
262 #[test]
265 fn extracts_module_script_src() {
266 let info = parse_html_to_module(
267 FileId(0),
268 r#"<script type="module" src="./src/entry.js"></script>"#,
269 0,
270 );
271 assert_eq!(info.imports.len(), 1);
272 assert_eq!(info.imports[0].source, "./src/entry.js");
273 }
274
275 #[test]
276 fn extracts_plain_script_src() {
277 let info = parse_html_to_module(
278 FileId(0),
279 r#"<script src="./src/polyfills.js"></script>"#,
280 0,
281 );
282 assert_eq!(info.imports.len(), 1);
283 assert_eq!(info.imports[0].source, "./src/polyfills.js");
284 }
285
286 #[test]
287 fn extracts_multiple_scripts() {
288 let info = parse_html_to_module(
289 FileId(0),
290 r#"
291 <script type="module" src="./src/entry.js"></script>
292 <script src="./src/polyfills.js"></script>
293 "#,
294 0,
295 );
296 assert_eq!(info.imports.len(), 2);
297 }
298
299 #[test]
300 fn skips_inline_script() {
301 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
302 assert!(info.imports.is_empty());
303 }
304
305 #[test]
306 fn skips_remote_script() {
307 let info = parse_html_to_module(
308 FileId(0),
309 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
310 0,
311 );
312 assert!(info.imports.is_empty());
313 }
314
315 #[test]
316 fn skips_protocol_relative_script() {
317 let info = parse_html_to_module(
318 FileId(0),
319 r#"<script src="//cdn.example.com/lib.js"></script>"#,
320 0,
321 );
322 assert!(info.imports.is_empty());
323 }
324
325 #[test]
328 fn extracts_stylesheet_link() {
329 let info = parse_html_to_module(
330 FileId(0),
331 r#"<link rel="stylesheet" href="./src/global.css" />"#,
332 0,
333 );
334 assert_eq!(info.imports.len(), 1);
335 assert_eq!(info.imports[0].source, "./src/global.css");
336 }
337
338 #[test]
339 fn extracts_modulepreload_link() {
340 let info = parse_html_to_module(
341 FileId(0),
342 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
343 0,
344 );
345 assert_eq!(info.imports.len(), 1);
346 assert_eq!(info.imports[0].source, "./src/vendor.js");
347 }
348
349 #[test]
350 fn extracts_link_with_reversed_attrs() {
351 let info = parse_html_to_module(
352 FileId(0),
353 r#"<link href="./src/global.css" rel="stylesheet" />"#,
354 0,
355 );
356 assert_eq!(info.imports.len(), 1);
357 assert_eq!(info.imports[0].source, "./src/global.css");
358 }
359
360 #[test]
367 fn bare_script_src_normalized_to_relative() {
368 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
369 assert_eq!(info.imports.len(), 1);
370 assert_eq!(info.imports[0].source, "./app.js");
371 }
372
373 #[test]
374 fn bare_module_script_src_normalized_to_relative() {
375 let info = parse_html_to_module(
376 FileId(0),
377 r#"<script type="module" src="main.ts"></script>"#,
378 0,
379 );
380 assert_eq!(info.imports.len(), 1);
381 assert_eq!(info.imports[0].source, "./main.ts");
382 }
383
384 #[test]
385 fn bare_stylesheet_link_href_normalized_to_relative() {
386 let info = parse_html_to_module(
387 FileId(0),
388 r#"<link rel="stylesheet" href="styles.css" />"#,
389 0,
390 );
391 assert_eq!(info.imports.len(), 1);
392 assert_eq!(info.imports[0].source, "./styles.css");
393 }
394
395 #[test]
396 fn bare_link_href_reversed_attrs_normalized_to_relative() {
397 let info = parse_html_to_module(
398 FileId(0),
399 r#"<link href="styles.css" rel="stylesheet" />"#,
400 0,
401 );
402 assert_eq!(info.imports.len(), 1);
403 assert_eq!(info.imports[0].source, "./styles.css");
404 }
405
406 #[test]
407 fn bare_modulepreload_link_href_normalized_to_relative() {
408 let info = parse_html_to_module(
409 FileId(0),
410 r#"<link rel="modulepreload" href="vendor.js" />"#,
411 0,
412 );
413 assert_eq!(info.imports.len(), 1);
414 assert_eq!(info.imports[0].source, "./vendor.js");
415 }
416
417 #[test]
418 fn bare_asset_with_subdir_normalized_to_relative() {
419 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
420 assert_eq!(info.imports.len(), 1);
421 assert_eq!(info.imports[0].source, "./assets/app.js");
422 }
423
424 #[test]
425 fn root_absolute_script_src_unchanged() {
426 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
429 assert_eq!(info.imports.len(), 1);
430 assert_eq!(info.imports[0].source, "/src/main.ts");
431 }
432
433 #[test]
434 fn parent_relative_script_src_unchanged() {
435 let info = parse_html_to_module(
436 FileId(0),
437 r#"<script src="../shared/vendor.js"></script>"#,
438 0,
439 );
440 assert_eq!(info.imports.len(), 1);
441 assert_eq!(info.imports[0].source, "../shared/vendor.js");
442 }
443
444 #[test]
445 fn skips_preload_link() {
446 let info = parse_html_to_module(
447 FileId(0),
448 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
449 0,
450 );
451 assert!(info.imports.is_empty());
452 }
453
454 #[test]
455 fn skips_icon_link() {
456 let info =
457 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
458 assert!(info.imports.is_empty());
459 }
460
461 #[test]
462 fn skips_remote_stylesheet() {
463 let info = parse_html_to_module(
464 FileId(0),
465 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
466 0,
467 );
468 assert!(info.imports.is_empty());
469 }
470
471 #[test]
474 fn skips_commented_out_script() {
475 let info = parse_html_to_module(
476 FileId(0),
477 r#"<!-- <script src="./old.js"></script> -->
478 <script src="./new.js"></script>"#,
479 0,
480 );
481 assert_eq!(info.imports.len(), 1);
482 assert_eq!(info.imports[0].source, "./new.js");
483 }
484
485 #[test]
486 fn skips_commented_out_link() {
487 let info = parse_html_to_module(
488 FileId(0),
489 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
490 <link rel="stylesheet" href="./new.css" />"#,
491 0,
492 );
493 assert_eq!(info.imports.len(), 1);
494 assert_eq!(info.imports[0].source, "./new.css");
495 }
496
497 #[test]
500 fn handles_multiline_script_tag() {
501 let info = parse_html_to_module(
502 FileId(0),
503 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
504 0,
505 );
506 assert_eq!(info.imports.len(), 1);
507 assert_eq!(info.imports[0].source, "./src/entry.js");
508 }
509
510 #[test]
511 fn handles_multiline_link_tag() {
512 let info = parse_html_to_module(
513 FileId(0),
514 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
515 0,
516 );
517 assert_eq!(info.imports.len(), 1);
518 assert_eq!(info.imports[0].source, "./src/global.css");
519 }
520
521 #[test]
524 fn full_vite_html() {
525 let info = parse_html_to_module(
526 FileId(0),
527 r#"<!doctype html>
528<html>
529 <head>
530 <link rel="stylesheet" href="./src/global.css" />
531 <link rel="icon" href="/favicon.ico" />
532 </head>
533 <body>
534 <div id="app"></div>
535 <script type="module" src="./src/entry.js"></script>
536 </body>
537</html>"#,
538 0,
539 );
540 assert_eq!(info.imports.len(), 2);
541 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
542 assert!(sources.contains(&"./src/global.css"));
543 assert!(sources.contains(&"./src/entry.js"));
544 }
545
546 #[test]
549 fn empty_html() {
550 let info = parse_html_to_module(FileId(0), "", 0);
551 assert!(info.imports.is_empty());
552 }
553
554 #[test]
555 fn html_with_no_assets() {
556 let info = parse_html_to_module(
557 FileId(0),
558 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
559 0,
560 );
561 assert!(info.imports.is_empty());
562 }
563
564 #[test]
565 fn single_quoted_attributes() {
566 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
567 assert_eq!(info.imports.len(), 1);
568 assert_eq!(info.imports[0].source, "./src/entry.js");
569 }
570
571 #[test]
572 fn all_imports_are_side_effect() {
573 let info = parse_html_to_module(
574 FileId(0),
575 r#"<script src="./entry.js"></script>
576 <link rel="stylesheet" href="./style.css" />"#,
577 0,
578 );
579 for imp in &info.imports {
580 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
581 assert!(imp.local_name.is_empty());
582 assert!(!imp.is_type_only);
583 }
584 }
585
586 #[test]
587 fn suppression_comments_extracted() {
588 let info = parse_html_to_module(
589 FileId(0),
590 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
591 0,
592 );
593 assert_eq!(info.imports.len(), 1);
597 }
598
599 #[test]
602 fn angular_template_extracts_member_refs() {
603 let info = parse_html_to_module(
604 FileId(0),
605 "<h1>{{ title() }}</h1>\n\
606 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
607 <button (click)=\"onButtonClick()\">Toggle</button>",
608 0,
609 );
610 let names: rustc_hash::FxHashSet<&str> = info
611 .member_accesses
612 .iter()
613 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
614 .map(|a| a.member.as_str())
615 .collect();
616 assert!(names.contains("title"), "should contain 'title'");
617 assert!(
618 names.contains("isHighlighted"),
619 "should contain 'isHighlighted'"
620 );
621 assert!(names.contains("greeting"), "should contain 'greeting'");
622 assert!(
623 names.contains("onButtonClick"),
624 "should contain 'onButtonClick'"
625 );
626 }
627
628 #[test]
629 fn plain_html_no_angular_refs() {
630 let info = parse_html_to_module(
631 FileId(0),
632 "<!doctype html><html><body><h1>Hello</h1></body></html>",
633 0,
634 );
635 assert!(info.member_accesses.is_empty());
636 }
637}