1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
69 let suppressions = crate::suppress::parse_suppressions_from_source(source);
70
71 let stripped = HTML_COMMENT_RE.replace_all(source, "");
73
74 let mut imports = Vec::new();
75
76 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
80 if let Some(m) = cap.get(1) {
81 let src = m.as_str().trim();
82 if !src.is_empty() && !is_remote_url(src) {
83 imports.push(ImportInfo {
84 source: normalize_asset_url(src),
85 imported_name: ImportedName::SideEffect,
86 local_name: String::new(),
87 is_type_only: false,
88 span: Span::default(),
89 source_span: Span::default(),
90 });
91 }
92 }
93 }
94
95 for cap in LINK_HREF_RE.captures_iter(&stripped) {
99 if let Some(m) = cap.get(2) {
100 let href = m.as_str().trim();
101 if !href.is_empty() && !is_remote_url(href) {
102 imports.push(ImportInfo {
103 source: normalize_asset_url(href),
104 imported_name: ImportedName::SideEffect,
105 local_name: String::new(),
106 is_type_only: false,
107 span: Span::default(),
108 source_span: Span::default(),
109 });
110 }
111 }
112 }
113 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
114 if let Some(m) = cap.get(1) {
115 let href = m.as_str().trim();
116 if !href.is_empty() && !is_remote_url(href) {
117 imports.push(ImportInfo {
118 source: normalize_asset_url(href),
119 imported_name: ImportedName::SideEffect,
120 local_name: String::new(),
121 is_type_only: false,
122 span: Span::default(),
123 source_span: Span::default(),
124 });
125 }
126 }
127 }
128
129 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
132 imports.dedup_by(|a, b| a.source == b.source);
133
134 let template_refs = angular::collect_angular_template_refs(source);
138 let member_accesses: Vec<MemberAccess> = template_refs
139 .into_iter()
140 .map(|name| MemberAccess {
141 object: ANGULAR_TPL_SENTINEL.to_string(),
142 member: name,
143 })
144 .collect();
145
146 ModuleInfo {
147 file_id,
148 exports: Vec::new(),
149 imports,
150 re_exports: Vec::new(),
151 dynamic_imports: Vec::new(),
152 dynamic_import_patterns: Vec::new(),
153 require_calls: Vec::new(),
154 member_accesses,
155 whole_object_uses: Vec::new(),
156 has_cjs_exports: false,
157 content_hash,
158 suppressions,
159 unused_import_bindings: Vec::new(),
160 line_offsets: fallow_types::extract::compute_line_offsets(source),
161 complexity: Vec::new(),
162 flag_uses: Vec::new(),
163 }
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169
170 #[test]
173 fn is_html_file_html() {
174 assert!(is_html_file(Path::new("index.html")));
175 }
176
177 #[test]
178 fn is_html_file_nested() {
179 assert!(is_html_file(Path::new("pages/about.html")));
180 }
181
182 #[test]
183 fn is_html_file_rejects_htm() {
184 assert!(!is_html_file(Path::new("index.htm")));
185 }
186
187 #[test]
188 fn is_html_file_rejects_js() {
189 assert!(!is_html_file(Path::new("app.js")));
190 }
191
192 #[test]
193 fn is_html_file_rejects_ts() {
194 assert!(!is_html_file(Path::new("app.ts")));
195 }
196
197 #[test]
198 fn is_html_file_rejects_vue() {
199 assert!(!is_html_file(Path::new("App.vue")));
200 }
201
202 #[test]
205 fn remote_url_http() {
206 assert!(is_remote_url("http://example.com/script.js"));
207 }
208
209 #[test]
210 fn remote_url_https() {
211 assert!(is_remote_url("https://cdn.example.com/style.css"));
212 }
213
214 #[test]
215 fn remote_url_protocol_relative() {
216 assert!(is_remote_url("//cdn.example.com/lib.js"));
217 }
218
219 #[test]
220 fn remote_url_data() {
221 assert!(is_remote_url("data:text/javascript;base64,abc"));
222 }
223
224 #[test]
225 fn local_relative_not_remote() {
226 assert!(!is_remote_url("./src/entry.js"));
227 }
228
229 #[test]
230 fn local_root_relative_not_remote() {
231 assert!(!is_remote_url("/src/entry.js"));
232 }
233
234 #[test]
237 fn extracts_module_script_src() {
238 let info = parse_html_to_module(
239 FileId(0),
240 r#"<script type="module" src="./src/entry.js"></script>"#,
241 0,
242 );
243 assert_eq!(info.imports.len(), 1);
244 assert_eq!(info.imports[0].source, "./src/entry.js");
245 }
246
247 #[test]
248 fn extracts_plain_script_src() {
249 let info = parse_html_to_module(
250 FileId(0),
251 r#"<script src="./src/polyfills.js"></script>"#,
252 0,
253 );
254 assert_eq!(info.imports.len(), 1);
255 assert_eq!(info.imports[0].source, "./src/polyfills.js");
256 }
257
258 #[test]
259 fn extracts_multiple_scripts() {
260 let info = parse_html_to_module(
261 FileId(0),
262 r#"
263 <script type="module" src="./src/entry.js"></script>
264 <script src="./src/polyfills.js"></script>
265 "#,
266 0,
267 );
268 assert_eq!(info.imports.len(), 2);
269 }
270
271 #[test]
272 fn skips_inline_script() {
273 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
274 assert!(info.imports.is_empty());
275 }
276
277 #[test]
278 fn skips_remote_script() {
279 let info = parse_html_to_module(
280 FileId(0),
281 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
282 0,
283 );
284 assert!(info.imports.is_empty());
285 }
286
287 #[test]
288 fn skips_protocol_relative_script() {
289 let info = parse_html_to_module(
290 FileId(0),
291 r#"<script src="//cdn.example.com/lib.js"></script>"#,
292 0,
293 );
294 assert!(info.imports.is_empty());
295 }
296
297 #[test]
300 fn extracts_stylesheet_link() {
301 let info = parse_html_to_module(
302 FileId(0),
303 r#"<link rel="stylesheet" href="./src/global.css" />"#,
304 0,
305 );
306 assert_eq!(info.imports.len(), 1);
307 assert_eq!(info.imports[0].source, "./src/global.css");
308 }
309
310 #[test]
311 fn extracts_modulepreload_link() {
312 let info = parse_html_to_module(
313 FileId(0),
314 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
315 0,
316 );
317 assert_eq!(info.imports.len(), 1);
318 assert_eq!(info.imports[0].source, "./src/vendor.js");
319 }
320
321 #[test]
322 fn extracts_link_with_reversed_attrs() {
323 let info = parse_html_to_module(
324 FileId(0),
325 r#"<link href="./src/global.css" rel="stylesheet" />"#,
326 0,
327 );
328 assert_eq!(info.imports.len(), 1);
329 assert_eq!(info.imports[0].source, "./src/global.css");
330 }
331
332 #[test]
339 fn bare_script_src_normalized_to_relative() {
340 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
341 assert_eq!(info.imports.len(), 1);
342 assert_eq!(info.imports[0].source, "./app.js");
343 }
344
345 #[test]
346 fn bare_module_script_src_normalized_to_relative() {
347 let info = parse_html_to_module(
348 FileId(0),
349 r#"<script type="module" src="main.ts"></script>"#,
350 0,
351 );
352 assert_eq!(info.imports.len(), 1);
353 assert_eq!(info.imports[0].source, "./main.ts");
354 }
355
356 #[test]
357 fn bare_stylesheet_link_href_normalized_to_relative() {
358 let info = parse_html_to_module(
359 FileId(0),
360 r#"<link rel="stylesheet" href="styles.css" />"#,
361 0,
362 );
363 assert_eq!(info.imports.len(), 1);
364 assert_eq!(info.imports[0].source, "./styles.css");
365 }
366
367 #[test]
368 fn bare_link_href_reversed_attrs_normalized_to_relative() {
369 let info = parse_html_to_module(
370 FileId(0),
371 r#"<link href="styles.css" rel="stylesheet" />"#,
372 0,
373 );
374 assert_eq!(info.imports.len(), 1);
375 assert_eq!(info.imports[0].source, "./styles.css");
376 }
377
378 #[test]
379 fn bare_modulepreload_link_href_normalized_to_relative() {
380 let info = parse_html_to_module(
381 FileId(0),
382 r#"<link rel="modulepreload" href="vendor.js" />"#,
383 0,
384 );
385 assert_eq!(info.imports.len(), 1);
386 assert_eq!(info.imports[0].source, "./vendor.js");
387 }
388
389 #[test]
390 fn bare_asset_with_subdir_normalized_to_relative() {
391 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
392 assert_eq!(info.imports.len(), 1);
393 assert_eq!(info.imports[0].source, "./assets/app.js");
394 }
395
396 #[test]
397 fn root_absolute_script_src_unchanged() {
398 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
401 assert_eq!(info.imports.len(), 1);
402 assert_eq!(info.imports[0].source, "/src/main.ts");
403 }
404
405 #[test]
406 fn parent_relative_script_src_unchanged() {
407 let info = parse_html_to_module(
408 FileId(0),
409 r#"<script src="../shared/vendor.js"></script>"#,
410 0,
411 );
412 assert_eq!(info.imports.len(), 1);
413 assert_eq!(info.imports[0].source, "../shared/vendor.js");
414 }
415
416 #[test]
417 fn skips_preload_link() {
418 let info = parse_html_to_module(
419 FileId(0),
420 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
421 0,
422 );
423 assert!(info.imports.is_empty());
424 }
425
426 #[test]
427 fn skips_icon_link() {
428 let info =
429 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
430 assert!(info.imports.is_empty());
431 }
432
433 #[test]
434 fn skips_remote_stylesheet() {
435 let info = parse_html_to_module(
436 FileId(0),
437 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
438 0,
439 );
440 assert!(info.imports.is_empty());
441 }
442
443 #[test]
446 fn skips_commented_out_script() {
447 let info = parse_html_to_module(
448 FileId(0),
449 r#"<!-- <script src="./old.js"></script> -->
450 <script src="./new.js"></script>"#,
451 0,
452 );
453 assert_eq!(info.imports.len(), 1);
454 assert_eq!(info.imports[0].source, "./new.js");
455 }
456
457 #[test]
458 fn skips_commented_out_link() {
459 let info = parse_html_to_module(
460 FileId(0),
461 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
462 <link rel="stylesheet" href="./new.css" />"#,
463 0,
464 );
465 assert_eq!(info.imports.len(), 1);
466 assert_eq!(info.imports[0].source, "./new.css");
467 }
468
469 #[test]
472 fn handles_multiline_script_tag() {
473 let info = parse_html_to_module(
474 FileId(0),
475 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
476 0,
477 );
478 assert_eq!(info.imports.len(), 1);
479 assert_eq!(info.imports[0].source, "./src/entry.js");
480 }
481
482 #[test]
483 fn handles_multiline_link_tag() {
484 let info = parse_html_to_module(
485 FileId(0),
486 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
487 0,
488 );
489 assert_eq!(info.imports.len(), 1);
490 assert_eq!(info.imports[0].source, "./src/global.css");
491 }
492
493 #[test]
496 fn full_vite_html() {
497 let info = parse_html_to_module(
498 FileId(0),
499 r#"<!doctype html>
500<html>
501 <head>
502 <link rel="stylesheet" href="./src/global.css" />
503 <link rel="icon" href="/favicon.ico" />
504 </head>
505 <body>
506 <div id="app"></div>
507 <script type="module" src="./src/entry.js"></script>
508 </body>
509</html>"#,
510 0,
511 );
512 assert_eq!(info.imports.len(), 2);
513 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
514 assert!(sources.contains(&"./src/global.css"));
515 assert!(sources.contains(&"./src/entry.js"));
516 }
517
518 #[test]
521 fn empty_html() {
522 let info = parse_html_to_module(FileId(0), "", 0);
523 assert!(info.imports.is_empty());
524 }
525
526 #[test]
527 fn html_with_no_assets() {
528 let info = parse_html_to_module(
529 FileId(0),
530 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
531 0,
532 );
533 assert!(info.imports.is_empty());
534 }
535
536 #[test]
537 fn single_quoted_attributes() {
538 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
539 assert_eq!(info.imports.len(), 1);
540 assert_eq!(info.imports[0].source, "./src/entry.js");
541 }
542
543 #[test]
544 fn all_imports_are_side_effect() {
545 let info = parse_html_to_module(
546 FileId(0),
547 r#"<script src="./entry.js"></script>
548 <link rel="stylesheet" href="./style.css" />"#,
549 0,
550 );
551 for imp in &info.imports {
552 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
553 assert!(imp.local_name.is_empty());
554 assert!(!imp.is_type_only);
555 }
556 }
557
558 #[test]
559 fn suppression_comments_extracted() {
560 let info = parse_html_to_module(
561 FileId(0),
562 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
563 0,
564 );
565 assert_eq!(info.imports.len(), 1);
569 }
570
571 #[test]
574 fn angular_template_extracts_member_refs() {
575 let info = parse_html_to_module(
576 FileId(0),
577 "<h1>{{ title() }}</h1>\n\
578 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
579 <button (click)=\"onButtonClick()\">Toggle</button>",
580 0,
581 );
582 let names: rustc_hash::FxHashSet<&str> = info
583 .member_accesses
584 .iter()
585 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
586 .map(|a| a.member.as_str())
587 .collect();
588 assert!(names.contains("title"), "should contain 'title'");
589 assert!(
590 names.contains("isHighlighted"),
591 "should contain 'isHighlighted'"
592 );
593 assert!(names.contains("greeting"), "should contain 'greeting'");
594 assert!(
595 names.contains("onButtonClick"),
596 "should contain 'onButtonClick'"
597 );
598 }
599
600 #[test]
601 fn plain_html_no_angular_refs() {
602 let info = parse_html_to_module(
603 FileId(0),
604 "<!doctype html><html><body><h1>Hello</h1></body></html>",
605 0,
606 );
607 assert!(info.member_accesses.is_empty());
608 }
609}