1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
74 let stripped = HTML_COMMENT_RE.replace_all(source, "");
75 let mut refs: Vec<String> = Vec::new();
76
77 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
78 if let Some(m) = cap.get(1) {
79 let src = m.as_str().trim();
80 if !src.is_empty() && !is_remote_url(src) {
81 refs.push(src.to_string());
82 }
83 }
84 }
85
86 for cap in LINK_HREF_RE.captures_iter(&stripped) {
87 if let Some(m) = cap.get(2) {
88 let href = m.as_str().trim();
89 if !href.is_empty() && !is_remote_url(href) {
90 refs.push(href.to_string());
91 }
92 }
93 }
94 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(1) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 refs.push(href.to_string());
99 }
100 }
101 }
102
103 refs
104}
105
106pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
108 let suppressions = crate::suppress::parse_suppressions_from_source(source);
109
110 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
113 .into_iter()
114 .map(|raw| ImportInfo {
115 source: normalize_asset_url(&raw),
116 imported_name: ImportedName::SideEffect,
117 local_name: String::new(),
118 is_type_only: false,
119 span: Span::default(),
120 source_span: Span::default(),
121 })
122 .collect();
123
124 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
127 imports.dedup_by(|a, b| a.source == b.source);
128
129 let template_refs = angular::collect_angular_template_refs(source);
142 let mut member_accesses: Vec<MemberAccess> = template_refs
143 .identifiers
144 .into_iter()
145 .map(|name| MemberAccess {
146 object: ANGULAR_TPL_SENTINEL.to_string(),
147 member: name,
148 })
149 .collect();
150 member_accesses.extend(template_refs.member_accesses);
151
152 ModuleInfo {
153 file_id,
154 exports: Vec::new(),
155 imports,
156 re_exports: Vec::new(),
157 dynamic_imports: Vec::new(),
158 dynamic_import_patterns: Vec::new(),
159 require_calls: Vec::new(),
160 member_accesses,
161 whole_object_uses: Vec::new(),
162 has_cjs_exports: false,
163 content_hash,
164 suppressions,
165 unused_import_bindings: Vec::new(),
166 type_referenced_import_bindings: Vec::new(),
167 value_referenced_import_bindings: Vec::new(),
168 line_offsets: fallow_types::extract::compute_line_offsets(source),
169 complexity: Vec::new(),
170 flag_uses: Vec::new(),
171 class_heritage: vec![],
172 }
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 #[test]
182 fn is_html_file_html() {
183 assert!(is_html_file(Path::new("index.html")));
184 }
185
186 #[test]
187 fn is_html_file_nested() {
188 assert!(is_html_file(Path::new("pages/about.html")));
189 }
190
191 #[test]
192 fn is_html_file_rejects_htm() {
193 assert!(!is_html_file(Path::new("index.htm")));
194 }
195
196 #[test]
197 fn is_html_file_rejects_js() {
198 assert!(!is_html_file(Path::new("app.js")));
199 }
200
201 #[test]
202 fn is_html_file_rejects_ts() {
203 assert!(!is_html_file(Path::new("app.ts")));
204 }
205
206 #[test]
207 fn is_html_file_rejects_vue() {
208 assert!(!is_html_file(Path::new("App.vue")));
209 }
210
211 #[test]
214 fn remote_url_http() {
215 assert!(is_remote_url("http://example.com/script.js"));
216 }
217
218 #[test]
219 fn remote_url_https() {
220 assert!(is_remote_url("https://cdn.example.com/style.css"));
221 }
222
223 #[test]
224 fn remote_url_protocol_relative() {
225 assert!(is_remote_url("//cdn.example.com/lib.js"));
226 }
227
228 #[test]
229 fn remote_url_data() {
230 assert!(is_remote_url("data:text/javascript;base64,abc"));
231 }
232
233 #[test]
234 fn local_relative_not_remote() {
235 assert!(!is_remote_url("./src/entry.js"));
236 }
237
238 #[test]
239 fn local_root_relative_not_remote() {
240 assert!(!is_remote_url("/src/entry.js"));
241 }
242
243 #[test]
246 fn extracts_module_script_src() {
247 let info = parse_html_to_module(
248 FileId(0),
249 r#"<script type="module" src="./src/entry.js"></script>"#,
250 0,
251 );
252 assert_eq!(info.imports.len(), 1);
253 assert_eq!(info.imports[0].source, "./src/entry.js");
254 }
255
256 #[test]
257 fn extracts_plain_script_src() {
258 let info = parse_html_to_module(
259 FileId(0),
260 r#"<script src="./src/polyfills.js"></script>"#,
261 0,
262 );
263 assert_eq!(info.imports.len(), 1);
264 assert_eq!(info.imports[0].source, "./src/polyfills.js");
265 }
266
267 #[test]
268 fn extracts_multiple_scripts() {
269 let info = parse_html_to_module(
270 FileId(0),
271 r#"
272 <script type="module" src="./src/entry.js"></script>
273 <script src="./src/polyfills.js"></script>
274 "#,
275 0,
276 );
277 assert_eq!(info.imports.len(), 2);
278 }
279
280 #[test]
281 fn skips_inline_script() {
282 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
283 assert!(info.imports.is_empty());
284 }
285
286 #[test]
287 fn skips_remote_script() {
288 let info = parse_html_to_module(
289 FileId(0),
290 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
291 0,
292 );
293 assert!(info.imports.is_empty());
294 }
295
296 #[test]
297 fn skips_protocol_relative_script() {
298 let info = parse_html_to_module(
299 FileId(0),
300 r#"<script src="//cdn.example.com/lib.js"></script>"#,
301 0,
302 );
303 assert!(info.imports.is_empty());
304 }
305
306 #[test]
309 fn extracts_stylesheet_link() {
310 let info = parse_html_to_module(
311 FileId(0),
312 r#"<link rel="stylesheet" href="./src/global.css" />"#,
313 0,
314 );
315 assert_eq!(info.imports.len(), 1);
316 assert_eq!(info.imports[0].source, "./src/global.css");
317 }
318
319 #[test]
320 fn extracts_modulepreload_link() {
321 let info = parse_html_to_module(
322 FileId(0),
323 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
324 0,
325 );
326 assert_eq!(info.imports.len(), 1);
327 assert_eq!(info.imports[0].source, "./src/vendor.js");
328 }
329
330 #[test]
331 fn extracts_link_with_reversed_attrs() {
332 let info = parse_html_to_module(
333 FileId(0),
334 r#"<link href="./src/global.css" rel="stylesheet" />"#,
335 0,
336 );
337 assert_eq!(info.imports.len(), 1);
338 assert_eq!(info.imports[0].source, "./src/global.css");
339 }
340
341 #[test]
348 fn bare_script_src_normalized_to_relative() {
349 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
350 assert_eq!(info.imports.len(), 1);
351 assert_eq!(info.imports[0].source, "./app.js");
352 }
353
354 #[test]
355 fn bare_module_script_src_normalized_to_relative() {
356 let info = parse_html_to_module(
357 FileId(0),
358 r#"<script type="module" src="main.ts"></script>"#,
359 0,
360 );
361 assert_eq!(info.imports.len(), 1);
362 assert_eq!(info.imports[0].source, "./main.ts");
363 }
364
365 #[test]
366 fn bare_stylesheet_link_href_normalized_to_relative() {
367 let info = parse_html_to_module(
368 FileId(0),
369 r#"<link rel="stylesheet" href="styles.css" />"#,
370 0,
371 );
372 assert_eq!(info.imports.len(), 1);
373 assert_eq!(info.imports[0].source, "./styles.css");
374 }
375
376 #[test]
377 fn bare_link_href_reversed_attrs_normalized_to_relative() {
378 let info = parse_html_to_module(
379 FileId(0),
380 r#"<link href="styles.css" rel="stylesheet" />"#,
381 0,
382 );
383 assert_eq!(info.imports.len(), 1);
384 assert_eq!(info.imports[0].source, "./styles.css");
385 }
386
387 #[test]
388 fn bare_modulepreload_link_href_normalized_to_relative() {
389 let info = parse_html_to_module(
390 FileId(0),
391 r#"<link rel="modulepreload" href="vendor.js" />"#,
392 0,
393 );
394 assert_eq!(info.imports.len(), 1);
395 assert_eq!(info.imports[0].source, "./vendor.js");
396 }
397
398 #[test]
399 fn bare_asset_with_subdir_normalized_to_relative() {
400 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
401 assert_eq!(info.imports.len(), 1);
402 assert_eq!(info.imports[0].source, "./assets/app.js");
403 }
404
405 #[test]
406 fn root_absolute_script_src_unchanged() {
407 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
410 assert_eq!(info.imports.len(), 1);
411 assert_eq!(info.imports[0].source, "/src/main.ts");
412 }
413
414 #[test]
415 fn parent_relative_script_src_unchanged() {
416 let info = parse_html_to_module(
417 FileId(0),
418 r#"<script src="../shared/vendor.js"></script>"#,
419 0,
420 );
421 assert_eq!(info.imports.len(), 1);
422 assert_eq!(info.imports[0].source, "../shared/vendor.js");
423 }
424
425 #[test]
426 fn skips_preload_link() {
427 let info = parse_html_to_module(
428 FileId(0),
429 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
430 0,
431 );
432 assert!(info.imports.is_empty());
433 }
434
435 #[test]
436 fn skips_icon_link() {
437 let info =
438 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
439 assert!(info.imports.is_empty());
440 }
441
442 #[test]
443 fn skips_remote_stylesheet() {
444 let info = parse_html_to_module(
445 FileId(0),
446 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
447 0,
448 );
449 assert!(info.imports.is_empty());
450 }
451
452 #[test]
455 fn skips_commented_out_script() {
456 let info = parse_html_to_module(
457 FileId(0),
458 r#"<!-- <script src="./old.js"></script> -->
459 <script src="./new.js"></script>"#,
460 0,
461 );
462 assert_eq!(info.imports.len(), 1);
463 assert_eq!(info.imports[0].source, "./new.js");
464 }
465
466 #[test]
467 fn skips_commented_out_link() {
468 let info = parse_html_to_module(
469 FileId(0),
470 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
471 <link rel="stylesheet" href="./new.css" />"#,
472 0,
473 );
474 assert_eq!(info.imports.len(), 1);
475 assert_eq!(info.imports[0].source, "./new.css");
476 }
477
478 #[test]
481 fn handles_multiline_script_tag() {
482 let info = parse_html_to_module(
483 FileId(0),
484 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
485 0,
486 );
487 assert_eq!(info.imports.len(), 1);
488 assert_eq!(info.imports[0].source, "./src/entry.js");
489 }
490
491 #[test]
492 fn handles_multiline_link_tag() {
493 let info = parse_html_to_module(
494 FileId(0),
495 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
496 0,
497 );
498 assert_eq!(info.imports.len(), 1);
499 assert_eq!(info.imports[0].source, "./src/global.css");
500 }
501
502 #[test]
505 fn full_vite_html() {
506 let info = parse_html_to_module(
507 FileId(0),
508 r#"<!doctype html>
509<html>
510 <head>
511 <link rel="stylesheet" href="./src/global.css" />
512 <link rel="icon" href="/favicon.ico" />
513 </head>
514 <body>
515 <div id="app"></div>
516 <script type="module" src="./src/entry.js"></script>
517 </body>
518</html>"#,
519 0,
520 );
521 assert_eq!(info.imports.len(), 2);
522 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
523 assert!(sources.contains(&"./src/global.css"));
524 assert!(sources.contains(&"./src/entry.js"));
525 }
526
527 #[test]
530 fn empty_html() {
531 let info = parse_html_to_module(FileId(0), "", 0);
532 assert!(info.imports.is_empty());
533 }
534
535 #[test]
536 fn html_with_no_assets() {
537 let info = parse_html_to_module(
538 FileId(0),
539 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
540 0,
541 );
542 assert!(info.imports.is_empty());
543 }
544
545 #[test]
546 fn single_quoted_attributes() {
547 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
548 assert_eq!(info.imports.len(), 1);
549 assert_eq!(info.imports[0].source, "./src/entry.js");
550 }
551
552 #[test]
553 fn all_imports_are_side_effect() {
554 let info = parse_html_to_module(
555 FileId(0),
556 r#"<script src="./entry.js"></script>
557 <link rel="stylesheet" href="./style.css" />"#,
558 0,
559 );
560 for imp in &info.imports {
561 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
562 assert!(imp.local_name.is_empty());
563 assert!(!imp.is_type_only);
564 }
565 }
566
567 #[test]
568 fn suppression_comments_extracted() {
569 let info = parse_html_to_module(
570 FileId(0),
571 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
572 0,
573 );
574 assert_eq!(info.imports.len(), 1);
578 }
579
580 #[test]
583 fn angular_template_extracts_member_refs() {
584 let info = parse_html_to_module(
585 FileId(0),
586 "<h1>{{ title() }}</h1>\n\
587 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
588 <button (click)=\"onButtonClick()\">Toggle</button>",
589 0,
590 );
591 let names: rustc_hash::FxHashSet<&str> = info
592 .member_accesses
593 .iter()
594 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
595 .map(|a| a.member.as_str())
596 .collect();
597 assert!(names.contains("title"), "should contain 'title'");
598 assert!(
599 names.contains("isHighlighted"),
600 "should contain 'isHighlighted'"
601 );
602 assert!(names.contains("greeting"), "should contain 'greeting'");
603 assert!(
604 names.contains("onButtonClick"),
605 "should contain 'onButtonClick'"
606 );
607 }
608
609 #[test]
610 fn plain_html_no_angular_refs() {
611 let info = parse_html_to_module(
612 FileId(0),
613 "<!doctype html><html><body><h1>Hello</h1></body></html>",
614 0,
615 );
616 assert!(info.member_accesses.is_empty());
617 }
618}