1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
74 let stripped = HTML_COMMENT_RE.replace_all(source, "");
75 let mut refs: Vec<String> = Vec::new();
76
77 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
78 if let Some(m) = cap.get(1) {
79 let src = m.as_str().trim();
80 if !src.is_empty() && !is_remote_url(src) {
81 refs.push(src.to_string());
82 }
83 }
84 }
85
86 for cap in LINK_HREF_RE.captures_iter(&stripped) {
87 if let Some(m) = cap.get(2) {
88 let href = m.as_str().trim();
89 if !href.is_empty() && !is_remote_url(href) {
90 refs.push(href.to_string());
91 }
92 }
93 }
94 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(1) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 refs.push(href.to_string());
99 }
100 }
101 }
102
103 refs
104}
105
106#[cfg(test)]
108pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
109 parse_html_to_module_with_complexity(file_id, source, content_hash, false)
110}
111
112pub(crate) fn parse_html_to_module_with_complexity(
114 file_id: FileId,
115 source: &str,
116 content_hash: u64,
117 need_complexity: bool,
118) -> ModuleInfo {
119 let suppressions = crate::suppress::parse_suppressions_from_source(source);
120
121 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
124 .into_iter()
125 .map(|raw| ImportInfo {
126 source: normalize_asset_url(&raw),
127 imported_name: ImportedName::SideEffect,
128 local_name: String::new(),
129 is_type_only: false,
130 from_style: false,
131 span: Span::default(),
132 source_span: Span::default(),
133 })
134 .collect();
135
136 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
139 imports.dedup_by(|a, b| a.source == b.source);
140
141 let template_refs = angular::collect_angular_template_refs(source);
154 let mut member_accesses: Vec<MemberAccess> = template_refs
155 .identifiers
156 .into_iter()
157 .map(|name| MemberAccess {
158 object: ANGULAR_TPL_SENTINEL.to_string(),
159 member: name,
160 })
161 .collect();
162 member_accesses.extend(template_refs.member_accesses);
163
164 let complexity = if need_complexity {
165 crate::template_complexity::compute_angular_template_complexity(source)
166 .into_iter()
167 .collect()
168 } else {
169 Vec::new()
170 };
171
172 ModuleInfo {
173 file_id,
174 exports: Vec::new(),
175 imports,
176 re_exports: Vec::new(),
177 dynamic_imports: Vec::new(),
178 dynamic_import_patterns: Vec::new(),
179 require_calls: Vec::new(),
180 member_accesses,
181 whole_object_uses: Vec::new(),
182 has_cjs_exports: false,
183 has_angular_component_template_url: false,
184 content_hash,
185 suppressions,
186 unused_import_bindings: Vec::new(),
187 type_referenced_import_bindings: Vec::new(),
188 value_referenced_import_bindings: Vec::new(),
189 line_offsets: fallow_types::extract::compute_line_offsets(source),
190 complexity,
191 flag_uses: Vec::new(),
192 class_heritage: vec![],
193 local_type_declarations: Vec::new(),
194 public_signature_type_references: Vec::new(),
195 namespace_object_aliases: Vec::new(),
196 }
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202
203 #[test]
206 fn is_html_file_html() {
207 assert!(is_html_file(Path::new("index.html")));
208 }
209
210 #[test]
211 fn is_html_file_nested() {
212 assert!(is_html_file(Path::new("pages/about.html")));
213 }
214
215 #[test]
216 fn is_html_file_rejects_htm() {
217 assert!(!is_html_file(Path::new("index.htm")));
218 }
219
220 #[test]
221 fn is_html_file_rejects_js() {
222 assert!(!is_html_file(Path::new("app.js")));
223 }
224
225 #[test]
226 fn is_html_file_rejects_ts() {
227 assert!(!is_html_file(Path::new("app.ts")));
228 }
229
230 #[test]
231 fn is_html_file_rejects_vue() {
232 assert!(!is_html_file(Path::new("App.vue")));
233 }
234
235 #[test]
238 fn remote_url_http() {
239 assert!(is_remote_url("http://example.com/script.js"));
240 }
241
242 #[test]
243 fn remote_url_https() {
244 assert!(is_remote_url("https://cdn.example.com/style.css"));
245 }
246
247 #[test]
248 fn remote_url_protocol_relative() {
249 assert!(is_remote_url("//cdn.example.com/lib.js"));
250 }
251
252 #[test]
253 fn remote_url_data() {
254 assert!(is_remote_url("data:text/javascript;base64,abc"));
255 }
256
257 #[test]
258 fn local_relative_not_remote() {
259 assert!(!is_remote_url("./src/entry.js"));
260 }
261
262 #[test]
263 fn local_root_relative_not_remote() {
264 assert!(!is_remote_url("/src/entry.js"));
265 }
266
267 #[test]
270 fn extracts_module_script_src() {
271 let info = parse_html_to_module(
272 FileId(0),
273 r#"<script type="module" src="./src/entry.js"></script>"#,
274 0,
275 );
276 assert_eq!(info.imports.len(), 1);
277 assert_eq!(info.imports[0].source, "./src/entry.js");
278 }
279
280 #[test]
281 fn extracts_plain_script_src() {
282 let info = parse_html_to_module(
283 FileId(0),
284 r#"<script src="./src/polyfills.js"></script>"#,
285 0,
286 );
287 assert_eq!(info.imports.len(), 1);
288 assert_eq!(info.imports[0].source, "./src/polyfills.js");
289 }
290
291 #[test]
292 fn extracts_multiple_scripts() {
293 let info = parse_html_to_module(
294 FileId(0),
295 r#"
296 <script type="module" src="./src/entry.js"></script>
297 <script src="./src/polyfills.js"></script>
298 "#,
299 0,
300 );
301 assert_eq!(info.imports.len(), 2);
302 }
303
304 #[test]
305 fn skips_inline_script() {
306 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
307 assert!(info.imports.is_empty());
308 }
309
310 #[test]
311 fn skips_remote_script() {
312 let info = parse_html_to_module(
313 FileId(0),
314 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
315 0,
316 );
317 assert!(info.imports.is_empty());
318 }
319
320 #[test]
321 fn skips_protocol_relative_script() {
322 let info = parse_html_to_module(
323 FileId(0),
324 r#"<script src="//cdn.example.com/lib.js"></script>"#,
325 0,
326 );
327 assert!(info.imports.is_empty());
328 }
329
330 #[test]
333 fn extracts_stylesheet_link() {
334 let info = parse_html_to_module(
335 FileId(0),
336 r#"<link rel="stylesheet" href="./src/global.css" />"#,
337 0,
338 );
339 assert_eq!(info.imports.len(), 1);
340 assert_eq!(info.imports[0].source, "./src/global.css");
341 }
342
343 #[test]
344 fn extracts_modulepreload_link() {
345 let info = parse_html_to_module(
346 FileId(0),
347 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
348 0,
349 );
350 assert_eq!(info.imports.len(), 1);
351 assert_eq!(info.imports[0].source, "./src/vendor.js");
352 }
353
354 #[test]
355 fn extracts_link_with_reversed_attrs() {
356 let info = parse_html_to_module(
357 FileId(0),
358 r#"<link href="./src/global.css" rel="stylesheet" />"#,
359 0,
360 );
361 assert_eq!(info.imports.len(), 1);
362 assert_eq!(info.imports[0].source, "./src/global.css");
363 }
364
365 #[test]
372 fn bare_script_src_normalized_to_relative() {
373 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
374 assert_eq!(info.imports.len(), 1);
375 assert_eq!(info.imports[0].source, "./app.js");
376 }
377
378 #[test]
379 fn bare_module_script_src_normalized_to_relative() {
380 let info = parse_html_to_module(
381 FileId(0),
382 r#"<script type="module" src="main.ts"></script>"#,
383 0,
384 );
385 assert_eq!(info.imports.len(), 1);
386 assert_eq!(info.imports[0].source, "./main.ts");
387 }
388
389 #[test]
390 fn bare_stylesheet_link_href_normalized_to_relative() {
391 let info = parse_html_to_module(
392 FileId(0),
393 r#"<link rel="stylesheet" href="styles.css" />"#,
394 0,
395 );
396 assert_eq!(info.imports.len(), 1);
397 assert_eq!(info.imports[0].source, "./styles.css");
398 }
399
400 #[test]
401 fn bare_link_href_reversed_attrs_normalized_to_relative() {
402 let info = parse_html_to_module(
403 FileId(0),
404 r#"<link href="styles.css" rel="stylesheet" />"#,
405 0,
406 );
407 assert_eq!(info.imports.len(), 1);
408 assert_eq!(info.imports[0].source, "./styles.css");
409 }
410
411 #[test]
412 fn bare_modulepreload_link_href_normalized_to_relative() {
413 let info = parse_html_to_module(
414 FileId(0),
415 r#"<link rel="modulepreload" href="vendor.js" />"#,
416 0,
417 );
418 assert_eq!(info.imports.len(), 1);
419 assert_eq!(info.imports[0].source, "./vendor.js");
420 }
421
422 #[test]
423 fn bare_asset_with_subdir_normalized_to_relative() {
424 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
425 assert_eq!(info.imports.len(), 1);
426 assert_eq!(info.imports[0].source, "./assets/app.js");
427 }
428
429 #[test]
430 fn root_absolute_script_src_unchanged() {
431 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
434 assert_eq!(info.imports.len(), 1);
435 assert_eq!(info.imports[0].source, "/src/main.ts");
436 }
437
438 #[test]
439 fn parent_relative_script_src_unchanged() {
440 let info = parse_html_to_module(
441 FileId(0),
442 r#"<script src="../shared/vendor.js"></script>"#,
443 0,
444 );
445 assert_eq!(info.imports.len(), 1);
446 assert_eq!(info.imports[0].source, "../shared/vendor.js");
447 }
448
449 #[test]
450 fn skips_preload_link() {
451 let info = parse_html_to_module(
452 FileId(0),
453 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
454 0,
455 );
456 assert!(info.imports.is_empty());
457 }
458
459 #[test]
460 fn skips_icon_link() {
461 let info =
462 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
463 assert!(info.imports.is_empty());
464 }
465
466 #[test]
467 fn skips_remote_stylesheet() {
468 let info = parse_html_to_module(
469 FileId(0),
470 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
471 0,
472 );
473 assert!(info.imports.is_empty());
474 }
475
476 #[test]
479 fn skips_commented_out_script() {
480 let info = parse_html_to_module(
481 FileId(0),
482 r#"<!-- <script src="./old.js"></script> -->
483 <script src="./new.js"></script>"#,
484 0,
485 );
486 assert_eq!(info.imports.len(), 1);
487 assert_eq!(info.imports[0].source, "./new.js");
488 }
489
490 #[test]
491 fn skips_commented_out_link() {
492 let info = parse_html_to_module(
493 FileId(0),
494 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
495 <link rel="stylesheet" href="./new.css" />"#,
496 0,
497 );
498 assert_eq!(info.imports.len(), 1);
499 assert_eq!(info.imports[0].source, "./new.css");
500 }
501
502 #[test]
505 fn handles_multiline_script_tag() {
506 let info = parse_html_to_module(
507 FileId(0),
508 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
509 0,
510 );
511 assert_eq!(info.imports.len(), 1);
512 assert_eq!(info.imports[0].source, "./src/entry.js");
513 }
514
515 #[test]
516 fn handles_multiline_link_tag() {
517 let info = parse_html_to_module(
518 FileId(0),
519 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
520 0,
521 );
522 assert_eq!(info.imports.len(), 1);
523 assert_eq!(info.imports[0].source, "./src/global.css");
524 }
525
526 #[test]
529 fn full_vite_html() {
530 let info = parse_html_to_module(
531 FileId(0),
532 r#"<!doctype html>
533<html>
534 <head>
535 <link rel="stylesheet" href="./src/global.css" />
536 <link rel="icon" href="/favicon.ico" />
537 </head>
538 <body>
539 <div id="app"></div>
540 <script type="module" src="./src/entry.js"></script>
541 </body>
542</html>"#,
543 0,
544 );
545 assert_eq!(info.imports.len(), 2);
546 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
547 assert!(sources.contains(&"./src/global.css"));
548 assert!(sources.contains(&"./src/entry.js"));
549 }
550
551 #[test]
554 fn empty_html() {
555 let info = parse_html_to_module(FileId(0), "", 0);
556 assert!(info.imports.is_empty());
557 }
558
559 #[test]
560 fn html_with_no_assets() {
561 let info = parse_html_to_module(
562 FileId(0),
563 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
564 0,
565 );
566 assert!(info.imports.is_empty());
567 }
568
569 #[test]
570 fn single_quoted_attributes() {
571 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
572 assert_eq!(info.imports.len(), 1);
573 assert_eq!(info.imports[0].source, "./src/entry.js");
574 }
575
576 #[test]
577 fn all_imports_are_side_effect() {
578 let info = parse_html_to_module(
579 FileId(0),
580 r#"<script src="./entry.js"></script>
581 <link rel="stylesheet" href="./style.css" />"#,
582 0,
583 );
584 for imp in &info.imports {
585 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
586 assert!(imp.local_name.is_empty());
587 assert!(!imp.is_type_only);
588 }
589 }
590
591 #[test]
592 fn suppression_comments_extracted() {
593 let info = parse_html_to_module(
594 FileId(0),
595 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
596 0,
597 );
598 assert_eq!(info.imports.len(), 1);
602 }
603
604 #[test]
607 fn angular_template_extracts_member_refs() {
608 let info = parse_html_to_module(
609 FileId(0),
610 "<h1>{{ title() }}</h1>\n\
611 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
612 <button (click)=\"onButtonClick()\">Toggle</button>",
613 0,
614 );
615 let names: rustc_hash::FxHashSet<&str> = info
616 .member_accesses
617 .iter()
618 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
619 .map(|a| a.member.as_str())
620 .collect();
621 assert!(names.contains("title"), "should contain 'title'");
622 assert!(
623 names.contains("isHighlighted"),
624 "should contain 'isHighlighted'"
625 );
626 assert!(names.contains("greeting"), "should contain 'greeting'");
627 assert!(
628 names.contains("onButtonClick"),
629 "should contain 'onButtonClick'"
630 );
631 }
632
633 #[test]
634 fn plain_html_no_angular_refs() {
635 let info = parse_html_to_module(
636 FileId(0),
637 "<!doctype html><html><body><h1>Hello</h1></body></html>",
638 0,
639 );
640 assert!(info.member_accesses.is_empty());
641 }
642}