1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::asset_url::normalize_asset_url;
17use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
18use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
19use fallow_types::discover::FileId;
20
21static HTML_COMMENT_RE: LazyLock<regex::Regex> =
23 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
24
25static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
29 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
30 .expect("valid regex")
31});
32
33static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
37 regex::Regex::new(
38 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
39 )
40 .expect("valid regex")
41});
42
43static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 regex::Regex::new(
46 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
47 )
48 .expect("valid regex")
49});
50
51pub(crate) fn is_html_file(path: &Path) -> bool {
54 path.extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|ext| ext == "html")
57}
58
59pub(crate) fn is_remote_url(src: &str) -> bool {
61 src.starts_with("http://")
62 || src.starts_with("https://")
63 || src.starts_with("//")
64 || src.starts_with("data:")
65}
66
67pub(crate) fn collect_asset_refs(source: &str) -> Vec<String> {
74 let stripped = HTML_COMMENT_RE.replace_all(source, "");
75 let mut refs: Vec<String> = Vec::new();
76
77 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
78 if let Some(m) = cap.get(1) {
79 let src = m.as_str().trim();
80 if !src.is_empty() && !is_remote_url(src) {
81 refs.push(src.to_string());
82 }
83 }
84 }
85
86 for cap in LINK_HREF_RE.captures_iter(&stripped) {
87 if let Some(m) = cap.get(2) {
88 let href = m.as_str().trim();
89 if !href.is_empty() && !is_remote_url(href) {
90 refs.push(href.to_string());
91 }
92 }
93 }
94 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(1) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 refs.push(href.to_string());
99 }
100 }
101 }
102
103 refs
104}
105
106pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
108 let suppressions = crate::suppress::parse_suppressions_from_source(source);
109
110 let mut imports: Vec<ImportInfo> = collect_asset_refs(source)
113 .into_iter()
114 .map(|raw| ImportInfo {
115 source: normalize_asset_url(&raw),
116 imported_name: ImportedName::SideEffect,
117 local_name: String::new(),
118 is_type_only: false,
119 span: Span::default(),
120 source_span: Span::default(),
121 })
122 .collect();
123
124 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
127 imports.dedup_by(|a, b| a.source == b.source);
128
129 let template_refs = angular::collect_angular_template_refs(source);
133 let member_accesses: Vec<MemberAccess> = template_refs
134 .into_iter()
135 .map(|name| MemberAccess {
136 object: ANGULAR_TPL_SENTINEL.to_string(),
137 member: name,
138 })
139 .collect();
140
141 ModuleInfo {
142 file_id,
143 exports: Vec::new(),
144 imports,
145 re_exports: Vec::new(),
146 dynamic_imports: Vec::new(),
147 dynamic_import_patterns: Vec::new(),
148 require_calls: Vec::new(),
149 member_accesses,
150 whole_object_uses: Vec::new(),
151 has_cjs_exports: false,
152 content_hash,
153 suppressions,
154 unused_import_bindings: Vec::new(),
155 line_offsets: fallow_types::extract::compute_line_offsets(source),
156 complexity: Vec::new(),
157 flag_uses: Vec::new(),
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 #[test]
168 fn is_html_file_html() {
169 assert!(is_html_file(Path::new("index.html")));
170 }
171
172 #[test]
173 fn is_html_file_nested() {
174 assert!(is_html_file(Path::new("pages/about.html")));
175 }
176
177 #[test]
178 fn is_html_file_rejects_htm() {
179 assert!(!is_html_file(Path::new("index.htm")));
180 }
181
182 #[test]
183 fn is_html_file_rejects_js() {
184 assert!(!is_html_file(Path::new("app.js")));
185 }
186
187 #[test]
188 fn is_html_file_rejects_ts() {
189 assert!(!is_html_file(Path::new("app.ts")));
190 }
191
192 #[test]
193 fn is_html_file_rejects_vue() {
194 assert!(!is_html_file(Path::new("App.vue")));
195 }
196
197 #[test]
200 fn remote_url_http() {
201 assert!(is_remote_url("http://example.com/script.js"));
202 }
203
204 #[test]
205 fn remote_url_https() {
206 assert!(is_remote_url("https://cdn.example.com/style.css"));
207 }
208
209 #[test]
210 fn remote_url_protocol_relative() {
211 assert!(is_remote_url("//cdn.example.com/lib.js"));
212 }
213
214 #[test]
215 fn remote_url_data() {
216 assert!(is_remote_url("data:text/javascript;base64,abc"));
217 }
218
219 #[test]
220 fn local_relative_not_remote() {
221 assert!(!is_remote_url("./src/entry.js"));
222 }
223
224 #[test]
225 fn local_root_relative_not_remote() {
226 assert!(!is_remote_url("/src/entry.js"));
227 }
228
229 #[test]
232 fn extracts_module_script_src() {
233 let info = parse_html_to_module(
234 FileId(0),
235 r#"<script type="module" src="./src/entry.js"></script>"#,
236 0,
237 );
238 assert_eq!(info.imports.len(), 1);
239 assert_eq!(info.imports[0].source, "./src/entry.js");
240 }
241
242 #[test]
243 fn extracts_plain_script_src() {
244 let info = parse_html_to_module(
245 FileId(0),
246 r#"<script src="./src/polyfills.js"></script>"#,
247 0,
248 );
249 assert_eq!(info.imports.len(), 1);
250 assert_eq!(info.imports[0].source, "./src/polyfills.js");
251 }
252
253 #[test]
254 fn extracts_multiple_scripts() {
255 let info = parse_html_to_module(
256 FileId(0),
257 r#"
258 <script type="module" src="./src/entry.js"></script>
259 <script src="./src/polyfills.js"></script>
260 "#,
261 0,
262 );
263 assert_eq!(info.imports.len(), 2);
264 }
265
266 #[test]
267 fn skips_inline_script() {
268 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
269 assert!(info.imports.is_empty());
270 }
271
272 #[test]
273 fn skips_remote_script() {
274 let info = parse_html_to_module(
275 FileId(0),
276 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
277 0,
278 );
279 assert!(info.imports.is_empty());
280 }
281
282 #[test]
283 fn skips_protocol_relative_script() {
284 let info = parse_html_to_module(
285 FileId(0),
286 r#"<script src="//cdn.example.com/lib.js"></script>"#,
287 0,
288 );
289 assert!(info.imports.is_empty());
290 }
291
292 #[test]
295 fn extracts_stylesheet_link() {
296 let info = parse_html_to_module(
297 FileId(0),
298 r#"<link rel="stylesheet" href="./src/global.css" />"#,
299 0,
300 );
301 assert_eq!(info.imports.len(), 1);
302 assert_eq!(info.imports[0].source, "./src/global.css");
303 }
304
305 #[test]
306 fn extracts_modulepreload_link() {
307 let info = parse_html_to_module(
308 FileId(0),
309 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
310 0,
311 );
312 assert_eq!(info.imports.len(), 1);
313 assert_eq!(info.imports[0].source, "./src/vendor.js");
314 }
315
316 #[test]
317 fn extracts_link_with_reversed_attrs() {
318 let info = parse_html_to_module(
319 FileId(0),
320 r#"<link href="./src/global.css" rel="stylesheet" />"#,
321 0,
322 );
323 assert_eq!(info.imports.len(), 1);
324 assert_eq!(info.imports[0].source, "./src/global.css");
325 }
326
327 #[test]
334 fn bare_script_src_normalized_to_relative() {
335 let info = parse_html_to_module(FileId(0), r#"<script src="app.js"></script>"#, 0);
336 assert_eq!(info.imports.len(), 1);
337 assert_eq!(info.imports[0].source, "./app.js");
338 }
339
340 #[test]
341 fn bare_module_script_src_normalized_to_relative() {
342 let info = parse_html_to_module(
343 FileId(0),
344 r#"<script type="module" src="main.ts"></script>"#,
345 0,
346 );
347 assert_eq!(info.imports.len(), 1);
348 assert_eq!(info.imports[0].source, "./main.ts");
349 }
350
351 #[test]
352 fn bare_stylesheet_link_href_normalized_to_relative() {
353 let info = parse_html_to_module(
354 FileId(0),
355 r#"<link rel="stylesheet" href="styles.css" />"#,
356 0,
357 );
358 assert_eq!(info.imports.len(), 1);
359 assert_eq!(info.imports[0].source, "./styles.css");
360 }
361
362 #[test]
363 fn bare_link_href_reversed_attrs_normalized_to_relative() {
364 let info = parse_html_to_module(
365 FileId(0),
366 r#"<link href="styles.css" rel="stylesheet" />"#,
367 0,
368 );
369 assert_eq!(info.imports.len(), 1);
370 assert_eq!(info.imports[0].source, "./styles.css");
371 }
372
373 #[test]
374 fn bare_modulepreload_link_href_normalized_to_relative() {
375 let info = parse_html_to_module(
376 FileId(0),
377 r#"<link rel="modulepreload" href="vendor.js" />"#,
378 0,
379 );
380 assert_eq!(info.imports.len(), 1);
381 assert_eq!(info.imports[0].source, "./vendor.js");
382 }
383
384 #[test]
385 fn bare_asset_with_subdir_normalized_to_relative() {
386 let info = parse_html_to_module(FileId(0), r#"<script src="assets/app.js"></script>"#, 0);
387 assert_eq!(info.imports.len(), 1);
388 assert_eq!(info.imports[0].source, "./assets/app.js");
389 }
390
391 #[test]
392 fn root_absolute_script_src_unchanged() {
393 let info = parse_html_to_module(FileId(0), r#"<script src="/src/main.ts"></script>"#, 0);
396 assert_eq!(info.imports.len(), 1);
397 assert_eq!(info.imports[0].source, "/src/main.ts");
398 }
399
400 #[test]
401 fn parent_relative_script_src_unchanged() {
402 let info = parse_html_to_module(
403 FileId(0),
404 r#"<script src="../shared/vendor.js"></script>"#,
405 0,
406 );
407 assert_eq!(info.imports.len(), 1);
408 assert_eq!(info.imports[0].source, "../shared/vendor.js");
409 }
410
411 #[test]
412 fn skips_preload_link() {
413 let info = parse_html_to_module(
414 FileId(0),
415 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
416 0,
417 );
418 assert!(info.imports.is_empty());
419 }
420
421 #[test]
422 fn skips_icon_link() {
423 let info =
424 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
425 assert!(info.imports.is_empty());
426 }
427
428 #[test]
429 fn skips_remote_stylesheet() {
430 let info = parse_html_to_module(
431 FileId(0),
432 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
433 0,
434 );
435 assert!(info.imports.is_empty());
436 }
437
438 #[test]
441 fn skips_commented_out_script() {
442 let info = parse_html_to_module(
443 FileId(0),
444 r#"<!-- <script src="./old.js"></script> -->
445 <script src="./new.js"></script>"#,
446 0,
447 );
448 assert_eq!(info.imports.len(), 1);
449 assert_eq!(info.imports[0].source, "./new.js");
450 }
451
452 #[test]
453 fn skips_commented_out_link() {
454 let info = parse_html_to_module(
455 FileId(0),
456 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
457 <link rel="stylesheet" href="./new.css" />"#,
458 0,
459 );
460 assert_eq!(info.imports.len(), 1);
461 assert_eq!(info.imports[0].source, "./new.css");
462 }
463
464 #[test]
467 fn handles_multiline_script_tag() {
468 let info = parse_html_to_module(
469 FileId(0),
470 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
471 0,
472 );
473 assert_eq!(info.imports.len(), 1);
474 assert_eq!(info.imports[0].source, "./src/entry.js");
475 }
476
477 #[test]
478 fn handles_multiline_link_tag() {
479 let info = parse_html_to_module(
480 FileId(0),
481 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
482 0,
483 );
484 assert_eq!(info.imports.len(), 1);
485 assert_eq!(info.imports[0].source, "./src/global.css");
486 }
487
488 #[test]
491 fn full_vite_html() {
492 let info = parse_html_to_module(
493 FileId(0),
494 r#"<!doctype html>
495<html>
496 <head>
497 <link rel="stylesheet" href="./src/global.css" />
498 <link rel="icon" href="/favicon.ico" />
499 </head>
500 <body>
501 <div id="app"></div>
502 <script type="module" src="./src/entry.js"></script>
503 </body>
504</html>"#,
505 0,
506 );
507 assert_eq!(info.imports.len(), 2);
508 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
509 assert!(sources.contains(&"./src/global.css"));
510 assert!(sources.contains(&"./src/entry.js"));
511 }
512
513 #[test]
516 fn empty_html() {
517 let info = parse_html_to_module(FileId(0), "", 0);
518 assert!(info.imports.is_empty());
519 }
520
521 #[test]
522 fn html_with_no_assets() {
523 let info = parse_html_to_module(
524 FileId(0),
525 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
526 0,
527 );
528 assert!(info.imports.is_empty());
529 }
530
531 #[test]
532 fn single_quoted_attributes() {
533 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
534 assert_eq!(info.imports.len(), 1);
535 assert_eq!(info.imports[0].source, "./src/entry.js");
536 }
537
538 #[test]
539 fn all_imports_are_side_effect() {
540 let info = parse_html_to_module(
541 FileId(0),
542 r#"<script src="./entry.js"></script>
543 <link rel="stylesheet" href="./style.css" />"#,
544 0,
545 );
546 for imp in &info.imports {
547 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
548 assert!(imp.local_name.is_empty());
549 assert!(!imp.is_type_only);
550 }
551 }
552
553 #[test]
554 fn suppression_comments_extracted() {
555 let info = parse_html_to_module(
556 FileId(0),
557 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
558 0,
559 );
560 assert_eq!(info.imports.len(), 1);
564 }
565
566 #[test]
569 fn angular_template_extracts_member_refs() {
570 let info = parse_html_to_module(
571 FileId(0),
572 "<h1>{{ title() }}</h1>\n\
573 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
574 <button (click)=\"onButtonClick()\">Toggle</button>",
575 0,
576 );
577 let names: rustc_hash::FxHashSet<&str> = info
578 .member_accesses
579 .iter()
580 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
581 .map(|a| a.member.as_str())
582 .collect();
583 assert!(names.contains("title"), "should contain 'title'");
584 assert!(
585 names.contains("isHighlighted"),
586 "should contain 'isHighlighted'"
587 );
588 assert!(names.contains("greeting"), "should contain 'greeting'");
589 assert!(
590 names.contains("onButtonClick"),
591 "should contain 'onButtonClick'"
592 );
593 }
594
595 #[test]
596 fn plain_html_no_angular_refs() {
597 let info = parse_html_to_module(
598 FileId(0),
599 "<!doctype html><html><body><h1>Hello</h1></body></html>",
600 0,
601 );
602 assert!(info.member_accesses.is_empty());
603 }
604}