1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
74 let stripped = HTML_COMMENT_RE.replace_all(source, "");
75 let mut refs: Vec<String> = Vec::new();
76
77 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
78 if let Some(m) = cap.get(1) {
79 let src = m.as_str().trim();
80 if !src.is_empty() && !is_remote_url(src) {
81 refs.push(src.to_string());
82 }
83 }
84 }
85
86 for cap in LINK_HREF_RE.captures_iter(&stripped) {
87 if let Some(m) = cap.get(2) {
88 let href = m.as_str().trim();
89 if !href.is_empty() && !is_remote_url(href) {
90 refs.push(href.to_string());
91 }
92 }
93 }
94 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(1) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 refs.push(href.to_string());
99 }
100 }
101 }
102
103 refs
104}
105
106pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
108 let suppressions = crate::suppress::parse_suppressions_from_source(source);
109
110 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
113 .into_iter()
114 .map(|raw| ImportInfo {
115 source: normalize_asset_url(&raw),
116 imported_name: ImportedName::SideEffect,
117 local_name: String::new(),
118 is_type_only: false,
119 span: Span::default(),
120 source_span: Span::default(),
121 })
122 .collect();
123
124 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
127 imports.dedup_by(|a, b| a.source == b.source);
128
129 let template_refs = angular::collect_angular_template_refs(source);
133 let member_accesses: Vec<MemberAccess> = template_refs
134 .into_iter()
135 .map(|name| MemberAccess {
136 object: ANGULAR_TPL_SENTINEL.to_string(),
137 member: name,
138 })
139 .collect();
140
141 ModuleInfo {
142 file_id,
143 exports: Vec::new(),
144 imports,
145 re_exports: Vec::new(),
146 dynamic_imports: Vec::new(),
147 dynamic_import_patterns: Vec::new(),
148 require_calls: Vec::new(),
149 member_accesses,
150 whole_object_uses: Vec::new(),
151 has_cjs_exports: false,
152 content_hash,
153 suppressions,
154 unused_import_bindings: Vec::new(),
155 line_offsets: fallow_types::extract::compute_line_offsets(source),
156 complexity: Vec::new(),
157 flag_uses: Vec::new(),
158 class_heritage: vec![],
159 }
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
169 fn is_html_file_html() {
170 assert!(is_html_file(Path::new("index.html")));
171 }
172
173 #[test]
174 fn is_html_file_nested() {
175 assert!(is_html_file(Path::new("pages/about.html")));
176 }
177
178 #[test]
179 fn is_html_file_rejects_htm() {
180 assert!(!is_html_file(Path::new("index.htm")));
181 }
182
183 #[test]
184 fn is_html_file_rejects_js() {
185 assert!(!is_html_file(Path::new("app.js")));
186 }
187
188 #[test]
189 fn is_html_file_rejects_ts() {
190 assert!(!is_html_file(Path::new("app.ts")));
191 }
192
193 #[test]
194 fn is_html_file_rejects_vue() {
195 assert!(!is_html_file(Path::new("App.vue")));
196 }
197
198 #[test]
201 fn remote_url_http() {
202 assert!(is_remote_url("http://example.com/script.js"));
203 }
204
205 #[test]
206 fn remote_url_https() {
207 assert!(is_remote_url("https://cdn.example.com/style.css"));
208 }
209
210 #[test]
211 fn remote_url_protocol_relative() {
212 assert!(is_remote_url("//cdn.example.com/lib.js"));
213 }
214
215 #[test]
216 fn remote_url_data() {
217 assert!(is_remote_url("data:text/javascript;base64,abc"));
218 }
219
220 #[test]
221 fn local_relative_not_remote() {
222 assert!(!is_remote_url("./src/entry.js"));
223 }
224
225 #[test]
226 fn local_root_relative_not_remote() {
227 assert!(!is_remote_url("/src/entry.js"));
228 }
229
230 #[test]
233 fn extracts_module_script_src() {
234 let info = parse_html_to_module(
235 FileId(0),
236 r#"<script type="module" src="./src/entry.js"></script>"#,
237 0,
238 );
239 assert_eq!(info.imports.len(), 1);
240 assert_eq!(info.imports[0].source, "./src/entry.js");
241 }
242
243 #[test]
244 fn extracts_plain_script_src() {
245 let info = parse_html_to_module(
246 FileId(0),
247 r#"<script src="./src/polyfills.js"></script>"#,
248 0,
249 );
250 assert_eq!(info.imports.len(), 1);
251 assert_eq!(info.imports[0].source, "./src/polyfills.js");
252 }
253
254 #[test]
255 fn extracts_multiple_scripts() {
256 let info = parse_html_to_module(
257 FileId(0),
258 r#"
259 <script type="module" src="./src/entry.js"></script>
260 <script src="./src/polyfills.js"></script>
261 "#,
262 0,
263 );
264 assert_eq!(info.imports.len(), 2);
265 }
266
267 #[test]
268 fn skips_inline_script() {
269 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
270 assert!(info.imports.is_empty());
271 }
272
273 #[test]
274 fn skips_remote_script() {
275 let info = parse_html_to_module(
276 FileId(0),
277 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
278 0,
279 );
280 assert!(info.imports.is_empty());
281 }
282
283 #[test]
284 fn skips_protocol_relative_script() {
285 let info = parse_html_to_module(
286 FileId(0),
287 r#"<script src="//cdn.example.com/lib.js"></script>"#,
288 0,
289 );
290 assert!(info.imports.is_empty());
291 }
292
293 #[test]
296 fn extracts_stylesheet_link() {
297 let info = parse_html_to_module(
298 FileId(0),
299 r#"<link rel="stylesheet" href="./src/global.css" />"#,
300 0,
301 );
302 assert_eq!(info.imports.len(), 1);
303 assert_eq!(info.imports[0].source, "./src/global.css");
304 }
305
306 #[test]
307 fn extracts_modulepreload_link() {
308 let info = parse_html_to_module(
309 FileId(0),
310 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
311 0,
312 );
313 assert_eq!(info.imports.len(), 1);
314 assert_eq!(info.imports[0].source, "./src/vendor.js");
315 }
316
317 #[test]
318 fn extracts_link_with_reversed_attrs() {
319 let info = parse_html_to_module(
320 FileId(0),
321 r#"<link href="./src/global.css" rel="stylesheet" />"#,
322 0,
323 );
324 assert_eq!(info.imports.len(), 1);
325 assert_eq!(info.imports[0].source, "./src/global.css");
326 }
327
328 #[test]
335 fn bare_script_src_normalized_to_relative() {
336 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
337 assert_eq!(info.imports.len(), 1);
338 assert_eq!(info.imports[0].source, "./app.js");
339 }
340
341 #[test]
342 fn bare_module_script_src_normalized_to_relative() {
343 let info = parse_html_to_module(
344 FileId(0),
345 r#"<script type="module" src="main.ts"></script>"#,
346 0,
347 );
348 assert_eq!(info.imports.len(), 1);
349 assert_eq!(info.imports[0].source, "./main.ts");
350 }
351
352 #[test]
353 fn bare_stylesheet_link_href_normalized_to_relative() {
354 let info = parse_html_to_module(
355 FileId(0),
356 r#"<link rel="stylesheet" href="styles.css" />"#,
357 0,
358 );
359 assert_eq!(info.imports.len(), 1);
360 assert_eq!(info.imports[0].source, "./styles.css");
361 }
362
363 #[test]
364 fn bare_link_href_reversed_attrs_normalized_to_relative() {
365 let info = parse_html_to_module(
366 FileId(0),
367 r#"<link href="styles.css" rel="stylesheet" />"#,
368 0,
369 );
370 assert_eq!(info.imports.len(), 1);
371 assert_eq!(info.imports[0].source, "./styles.css");
372 }
373
374 #[test]
375 fn bare_modulepreload_link_href_normalized_to_relative() {
376 let info = parse_html_to_module(
377 FileId(0),
378 r#"<link rel="modulepreload" href="vendor.js" />"#,
379 0,
380 );
381 assert_eq!(info.imports.len(), 1);
382 assert_eq!(info.imports[0].source, "./vendor.js");
383 }
384
385 #[test]
386 fn bare_asset_with_subdir_normalized_to_relative() {
387 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
388 assert_eq!(info.imports.len(), 1);
389 assert_eq!(info.imports[0].source, "./assets/app.js");
390 }
391
392 #[test]
393 fn root_absolute_script_src_unchanged() {
394 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
397 assert_eq!(info.imports.len(), 1);
398 assert_eq!(info.imports[0].source, "/src/main.ts");
399 }
400
401 #[test]
402 fn parent_relative_script_src_unchanged() {
403 let info = parse_html_to_module(
404 FileId(0),
405 r#"<script src="../shared/vendor.js"></script>"#,
406 0,
407 );
408 assert_eq!(info.imports.len(), 1);
409 assert_eq!(info.imports[0].source, "../shared/vendor.js");
410 }
411
412 #[test]
413 fn skips_preload_link() {
414 let info = parse_html_to_module(
415 FileId(0),
416 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
417 0,
418 );
419 assert!(info.imports.is_empty());
420 }
421
422 #[test]
423 fn skips_icon_link() {
424 let info =
425 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
426 assert!(info.imports.is_empty());
427 }
428
429 #[test]
430 fn skips_remote_stylesheet() {
431 let info = parse_html_to_module(
432 FileId(0),
433 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
434 0,
435 );
436 assert!(info.imports.is_empty());
437 }
438
439 #[test]
442 fn skips_commented_out_script() {
443 let info = parse_html_to_module(
444 FileId(0),
445 r#"<!-- <script src="./old.js"></script> -->
446 <script src="./new.js"></script>"#,
447 0,
448 );
449 assert_eq!(info.imports.len(), 1);
450 assert_eq!(info.imports[0].source, "./new.js");
451 }
452
453 #[test]
454 fn skips_commented_out_link() {
455 let info = parse_html_to_module(
456 FileId(0),
457 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
458 <link rel="stylesheet" href="./new.css" />"#,
459 0,
460 );
461 assert_eq!(info.imports.len(), 1);
462 assert_eq!(info.imports[0].source, "./new.css");
463 }
464
465 #[test]
468 fn handles_multiline_script_tag() {
469 let info = parse_html_to_module(
470 FileId(0),
471 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
472 0,
473 );
474 assert_eq!(info.imports.len(), 1);
475 assert_eq!(info.imports[0].source, "./src/entry.js");
476 }
477
478 #[test]
479 fn handles_multiline_link_tag() {
480 let info = parse_html_to_module(
481 FileId(0),
482 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
483 0,
484 );
485 assert_eq!(info.imports.len(), 1);
486 assert_eq!(info.imports[0].source, "./src/global.css");
487 }
488
489 #[test]
492 fn full_vite_html() {
493 let info = parse_html_to_module(
494 FileId(0),
495 r#"<!doctype html>
496<html>
497 <head>
498 <link rel="stylesheet" href="./src/global.css" />
499 <link rel="icon" href="/favicon.ico" />
500 </head>
501 <body>
502 <div id="app"></div>
503 <script type="module" src="./src/entry.js"></script>
504 </body>
505</html>"#,
506 0,
507 );
508 assert_eq!(info.imports.len(), 2);
509 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
510 assert!(sources.contains(&"./src/global.css"));
511 assert!(sources.contains(&"./src/entry.js"));
512 }
513
514 #[test]
517 fn empty_html() {
518 let info = parse_html_to_module(FileId(0), "", 0);
519 assert!(info.imports.is_empty());
520 }
521
522 #[test]
523 fn html_with_no_assets() {
524 let info = parse_html_to_module(
525 FileId(0),
526 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
527 0,
528 );
529 assert!(info.imports.is_empty());
530 }
531
532 #[test]
533 fn single_quoted_attributes() {
534 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
535 assert_eq!(info.imports.len(), 1);
536 assert_eq!(info.imports[0].source, "./src/entry.js");
537 }
538
539 #[test]
540 fn all_imports_are_side_effect() {
541 let info = parse_html_to_module(
542 FileId(0),
543 r#"<script src="./entry.js"></script>
544 <link rel="stylesheet" href="./style.css" />"#,
545 0,
546 );
547 for imp in &info.imports {
548 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
549 assert!(imp.local_name.is_empty());
550 assert!(!imp.is_type_only);
551 }
552 }
553
554 #[test]
555 fn suppression_comments_extracted() {
556 let info = parse_html_to_module(
557 FileId(0),
558 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
559 0,
560 );
561 assert_eq!(info.imports.len(), 1);
565 }
566
567 #[test]
570 fn angular_template_extracts_member_refs() {
571 let info = parse_html_to_module(
572 FileId(0),
573 "<h1>{{ title() }}</h1>\n\
574 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
575 <button (click)=\"onButtonClick()\">Toggle</button>",
576 0,
577 );
578 let names: rustc_hash::FxHashSet<&str> = info
579 .member_accesses
580 .iter()
581 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
582 .map(|a| a.member.as_str())
583 .collect();
584 assert!(names.contains("title"), "should contain 'title'");
585 assert!(
586 names.contains("isHighlighted"),
587 "should contain 'isHighlighted'"
588 );
589 assert!(names.contains("greeting"), "should contain 'greeting'");
590 assert!(
591 names.contains("onButtonClick"),
592 "should contain 'onButtonClick'"
593 );
594 }
595
596 #[test]
597 fn plain_html_no_angular_refs() {
598 let info = parse_html_to_module(
599 FileId(0),
600 "<!doctype html><html><body><h1>Hello</h1></body></html>",
601 0,
602 );
603 assert!(info.member_accesses.is_empty());
604 }
605}