1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
74 let stripped = HTML_COMMENT_RE.replace_all(source, "");
75 let mut refs: Vec<String> = Vec::new();
76
77 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
78 if let Some(m) = cap.get(1) {
79 let src = m.as_str().trim();
80 if !src.is_empty() && !is_remote_url(src) {
81 refs.push(src.to_string());
82 }
83 }
84 }
85
86 for cap in LINK_HREF_RE.captures_iter(&stripped) {
87 if let Some(m) = cap.get(2) {
88 let href = m.as_str().trim();
89 if !href.is_empty() && !is_remote_url(href) {
90 refs.push(href.to_string());
91 }
92 }
93 }
94 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(1) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 refs.push(href.to_string());
99 }
100 }
101 }
102
103 refs
104}
105
106#[cfg(test)]
108pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
109 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
110}
111
112pub(crate) fn parse_html_to_module_with_complexity(
114 file_id: FileId,
115 source: &str,
116 content_hash: u64,
117 need_complexity: bool,
118) -> ModuleInfo {
119 let suppressions = crate::suppress::parse_suppressions_from_source(source);
120
121 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
124 .into_iter()
125 .map(|raw| ImportInfo {
126 source: normalize_asset_url(&raw),
127 imported_name: ImportedName::SideEffect,
128 local_name: String::new(),
129 is_type_only: false,
130 from_style: false,
131 span: Span::default(),
132 source_span: Span::default(),
133 })
134 .collect();
135
136 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
139 imports.dedup_by(|a, b| a.source == b.source);
140
141 let template_refs = angular::collect_angular_template_refs(source);
154 let mut member_accesses: Vec<MemberAccess> = template_refs
155 .identifiers
156 .into_iter()
157 .map(|name| MemberAccess {
158 object: ANGULAR_TPL_SENTINEL.to_string(),
159 member: name,
160 })
161 .collect();
162 member_accesses.extend(template_refs.member_accesses);
163
164 let complexity = if need_complexity {
165 crate::template_complexity::compute_angular_template_complexity(source)
166 .into_iter()
167 .collect()
168 } else {
169 Vec::new()
170 };
171
172 ModuleInfo {
173 file_id,
174 exports: Vec::new(),
175 imports,
176 re_exports: Vec::new(),
177 dynamic_imports: Vec::new(),
178 dynamic_import_patterns: Vec::new(),
179 require_calls: Vec::new(),
180 member_accesses,
181 whole_object_uses: Vec::new(),
182 has_cjs_exports: false,
183 content_hash,
184 suppressions,
185 unused_import_bindings: Vec::new(),
186 type_referenced_import_bindings: Vec::new(),
187 value_referenced_import_bindings: Vec::new(),
188 line_offsets: fallow_types::extract::compute_line_offsets(source),
189 complexity,
190 flag_uses: Vec::new(),
191 class_heritage: vec![],
192 local_type_declarations: Vec::new(),
193 public_signature_type_references: Vec::new(),
194 }
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200
201 #[test]
204 fn is_html_file_html() {
205 assert!(is_html_file(Path::new("index.html")));
206 }
207
208 #[test]
209 fn is_html_file_nested() {
210 assert!(is_html_file(Path::new("pages/about.html")));
211 }
212
213 #[test]
214 fn is_html_file_rejects_htm() {
215 assert!(!is_html_file(Path::new("index.htm")));
216 }
217
218 #[test]
219 fn is_html_file_rejects_js() {
220 assert!(!is_html_file(Path::new("app.js")));
221 }
222
223 #[test]
224 fn is_html_file_rejects_ts() {
225 assert!(!is_html_file(Path::new("app.ts")));
226 }
227
228 #[test]
229 fn is_html_file_rejects_vue() {
230 assert!(!is_html_file(Path::new("App.vue")));
231 }
232
233 #[test]
236 fn remote_url_http() {
237 assert!(is_remote_url("http://example.com/script.js"));
238 }
239
240 #[test]
241 fn remote_url_https() {
242 assert!(is_remote_url("https://cdn.example.com/style.css"));
243 }
244
245 #[test]
246 fn remote_url_protocol_relative() {
247 assert!(is_remote_url("//cdn.example.com/lib.js"));
248 }
249
250 #[test]
251 fn remote_url_data() {
252 assert!(is_remote_url("data:text/javascript;base64,abc"));
253 }
254
255 #[test]
256 fn local_relative_not_remote() {
257 assert!(!is_remote_url("./src/entry.js"));
258 }
259
260 #[test]
261 fn local_root_relative_not_remote() {
262 assert!(!is_remote_url("/src/entry.js"));
263 }
264
265 #[test]
268 fn extracts_module_script_src() {
269 let info = parse_html_to_module(
270 FileId(0),
271 r#"<script type="module" src="./src/entry.js"></script>"#,
272 0,
273 );
274 assert_eq!(info.imports.len(), 1);
275 assert_eq!(info.imports[0].source, "./src/entry.js");
276 }
277
278 #[test]
279 fn extracts_plain_script_src() {
280 let info = parse_html_to_module(
281 FileId(0),
282 r#"<script src="./src/polyfills.js"></script>"#,
283 0,
284 );
285 assert_eq!(info.imports.len(), 1);
286 assert_eq!(info.imports[0].source, "./src/polyfills.js");
287 }
288
289 #[test]
290 fn extracts_multiple_scripts() {
291 let info = parse_html_to_module(
292 FileId(0),
293 r#"
294 <script type="module" src="./src/entry.js"></script>
295 <script src="./src/polyfills.js"></script>
296 "#,
297 0,
298 );
299 assert_eq!(info.imports.len(), 2);
300 }
301
302 #[test]
303 fn skips_inline_script() {
304 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
305 assert!(info.imports.is_empty());
306 }
307
308 #[test]
309 fn skips_remote_script() {
310 let info = parse_html_to_module(
311 FileId(0),
312 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
313 0,
314 );
315 assert!(info.imports.is_empty());
316 }
317
318 #[test]
319 fn skips_protocol_relative_script() {
320 let info = parse_html_to_module(
321 FileId(0),
322 r#"<script src="//cdn.example.com/lib.js"></script>"#,
323 0,
324 );
325 assert!(info.imports.is_empty());
326 }
327
328 #[test]
331 fn extracts_stylesheet_link() {
332 let info = parse_html_to_module(
333 FileId(0),
334 r#"<link rel="stylesheet" href="./src/global.css" />"#,
335 0,
336 );
337 assert_eq!(info.imports.len(), 1);
338 assert_eq!(info.imports[0].source, "./src/global.css");
339 }
340
341 #[test]
342 fn extracts_modulepreload_link() {
343 let info = parse_html_to_module(
344 FileId(0),
345 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
346 0,
347 );
348 assert_eq!(info.imports.len(), 1);
349 assert_eq!(info.imports[0].source, "./src/vendor.js");
350 }
351
352 #[test]
353 fn extracts_link_with_reversed_attrs() {
354 let info = parse_html_to_module(
355 FileId(0),
356 r#"<link href="./src/global.css" rel="stylesheet" />"#,
357 0,
358 );
359 assert_eq!(info.imports.len(), 1);
360 assert_eq!(info.imports[0].source, "./src/global.css");
361 }
362
363 #[test]
370 fn bare_script_src_normalized_to_relative() {
371 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
372 assert_eq!(info.imports.len(), 1);
373 assert_eq!(info.imports[0].source, "./app.js");
374 }
375
376 #[test]
377 fn bare_module_script_src_normalized_to_relative() {
378 let info = parse_html_to_module(
379 FileId(0),
380 r#"<script type="module" src="main.ts"></script>"#,
381 0,
382 );
383 assert_eq!(info.imports.len(), 1);
384 assert_eq!(info.imports[0].source, "./main.ts");
385 }
386
387 #[test]
388 fn bare_stylesheet_link_href_normalized_to_relative() {
389 let info = parse_html_to_module(
390 FileId(0),
391 r#"<link rel="stylesheet" href="styles.css" />"#,
392 0,
393 );
394 assert_eq!(info.imports.len(), 1);
395 assert_eq!(info.imports[0].source, "./styles.css");
396 }
397
398 #[test]
399 fn bare_link_href_reversed_attrs_normalized_to_relative() {
400 let info = parse_html_to_module(
401 FileId(0),
402 r#"<link href="styles.css" rel="stylesheet" />"#,
403 0,
404 );
405 assert_eq!(info.imports.len(), 1);
406 assert_eq!(info.imports[0].source, "./styles.css");
407 }
408
409 #[test]
410 fn bare_modulepreload_link_href_normalized_to_relative() {
411 let info = parse_html_to_module(
412 FileId(0),
413 r#"<link rel="modulepreload" href="vendor.js" />"#,
414 0,
415 );
416 assert_eq!(info.imports.len(), 1);
417 assert_eq!(info.imports[0].source, "./vendor.js");
418 }
419
420 #[test]
421 fn bare_asset_with_subdir_normalized_to_relative() {
422 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
423 assert_eq!(info.imports.len(), 1);
424 assert_eq!(info.imports[0].source, "./assets/app.js");
425 }
426
427 #[test]
428 fn root_absolute_script_src_unchanged() {
429 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
432 assert_eq!(info.imports.len(), 1);
433 assert_eq!(info.imports[0].source, "/src/main.ts");
434 }
435
436 #[test]
437 fn parent_relative_script_src_unchanged() {
438 let info = parse_html_to_module(
439 FileId(0),
440 r#"<script src="../shared/vendor.js"></script>"#,
441 0,
442 );
443 assert_eq!(info.imports.len(), 1);
444 assert_eq!(info.imports[0].source, "../shared/vendor.js");
445 }
446
447 #[test]
448 fn skips_preload_link() {
449 let info = parse_html_to_module(
450 FileId(0),
451 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
452 0,
453 );
454 assert!(info.imports.is_empty());
455 }
456
457 #[test]
458 fn skips_icon_link() {
459 let info =
460 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
461 assert!(info.imports.is_empty());
462 }
463
464 #[test]
465 fn skips_remote_stylesheet() {
466 let info = parse_html_to_module(
467 FileId(0),
468 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
469 0,
470 );
471 assert!(info.imports.is_empty());
472 }
473
474 #[test]
477 fn skips_commented_out_script() {
478 let info = parse_html_to_module(
479 FileId(0),
480 r#"<!-- <script src="./old.js"></script> -->
481 <script src="./new.js"></script>"#,
482 0,
483 );
484 assert_eq!(info.imports.len(), 1);
485 assert_eq!(info.imports[0].source, "./new.js");
486 }
487
488 #[test]
489 fn skips_commented_out_link() {
490 let info = parse_html_to_module(
491 FileId(0),
492 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
493 <link rel="stylesheet" href="./new.css" />"#,
494 0,
495 );
496 assert_eq!(info.imports.len(), 1);
497 assert_eq!(info.imports[0].source, "./new.css");
498 }
499
500 #[test]
503 fn handles_multiline_script_tag() {
504 let info = parse_html_to_module(
505 FileId(0),
506 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
507 0,
508 );
509 assert_eq!(info.imports.len(), 1);
510 assert_eq!(info.imports[0].source, "./src/entry.js");
511 }
512
513 #[test]
514 fn handles_multiline_link_tag() {
515 let info = parse_html_to_module(
516 FileId(0),
517 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
518 0,
519 );
520 assert_eq!(info.imports.len(), 1);
521 assert_eq!(info.imports[0].source, "./src/global.css");
522 }
523
524 #[test]
527 fn full_vite_html() {
528 let info = parse_html_to_module(
529 FileId(0),
530 r#"<!doctype html>
531<html>
532 <head>
533 <link rel="stylesheet" href="./src/global.css" />
534 <link rel="icon" href="/favicon.ico" />
535 </head>
536 <body>
537 <div id="app"></div>
538 <script type="module" src="./src/entry.js"></script>
539 </body>
540</html>"#,
541 0,
542 );
543 assert_eq!(info.imports.len(), 2);
544 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
545 assert!(sources.contains(&"./src/global.css"));
546 assert!(sources.contains(&"./src/entry.js"));
547 }
548
549 #[test]
552 fn empty_html() {
553 let info = parse_html_to_module(FileId(0), "", 0);
554 assert!(info.imports.is_empty());
555 }
556
557 #[test]
558 fn html_with_no_assets() {
559 let info = parse_html_to_module(
560 FileId(0),
561 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
562 0,
563 );
564 assert!(info.imports.is_empty());
565 }
566
567 #[test]
568 fn single_quoted_attributes() {
569 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
570 assert_eq!(info.imports.len(), 1);
571 assert_eq!(info.imports[0].source, "./src/entry.js");
572 }
573
574 #[test]
575 fn all_imports_are_side_effect() {
576 let info = parse_html_to_module(
577 FileId(0),
578 r#"<script src="./entry.js"></script>
579 <link rel="stylesheet" href="./style.css" />"#,
580 0,
581 );
582 for imp in &info.imports {
583 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
584 assert!(imp.local_name.is_empty());
585 assert!(!imp.is_type_only);
586 }
587 }
588
589 #[test]
590 fn suppression_comments_extracted() {
591 let info = parse_html_to_module(
592 FileId(0),
593 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
594 0,
595 );
596 assert_eq!(info.imports.len(), 1);
600 }
601
602 #[test]
605 fn angular_template_extracts_member_refs() {
606 let info = parse_html_to_module(
607 FileId(0),
608 "<h1>{{ title() }}</h1>\n\
609 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
610 <button (click)=\"onButtonClick()\">Toggle</button>",
611 0,
612 );
613 let names: rustc_hash::FxHashSet<&str> = info
614 .member_accesses
615 .iter()
616 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
617 .map(|a| a.member.as_str())
618 .collect();
619 assert!(names.contains("title"), "should contain 'title'");
620 assert!(
621 names.contains("isHighlighted"),
622 "should contain 'isHighlighted'"
623 );
624 assert!(names.contains("greeting"), "should contain 'greeting'");
625 assert!(
626 names.contains("onButtonClick"),
627 "should contain 'onButtonClick'"
628 );
629 }
630
631 #[test]
632 fn plain_html_no_angular_refs() {
633 let info = parse_html_to_module(
634 FileId(0),
635 "<!doctype html><html><body><h1>Hello</h1></body></html>",
636 0,
637 );
638 assert!(info.member_accesses.is_empty());
639 }
640}