1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
17use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
18use fallow_types::discover::FileId;
19
20static HTML_COMMENT_RE: LazyLock<regex::Regex> =
22 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
23
24static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
28 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
29 .expect("valid regex")
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 regex::Regex::new(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39 .expect("valid regex")
40});
41
42static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
44 regex::Regex::new(
45 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
46 )
47 .expect("valid regex")
48});
49
50pub(crate) fn is_html_file(path: &Path) -> bool {
53 path.extension()
54 .and_then(|e| e.to_str())
55 .is_some_and(|ext| ext == "html")
56}
57
58fn is_remote_url(src: &str) -> bool {
60 src.starts_with("http://")
61 || src.starts_with("https://")
62 || src.starts_with("//")
63 || src.starts_with("data:")
64}
65
66pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
68 let suppressions = crate::suppress::parse_suppressions_from_source(source);
69
70 let stripped = HTML_COMMENT_RE.replace_all(source, "");
72
73 let mut imports = Vec::new();
74
75 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
77 if let Some(m) = cap.get(1) {
78 let src = m.as_str().trim();
79 if !src.is_empty() && !is_remote_url(src) {
80 imports.push(ImportInfo {
81 source: src.to_string(),
82 imported_name: ImportedName::SideEffect,
83 local_name: String::new(),
84 is_type_only: false,
85 span: Span::default(),
86 source_span: Span::default(),
87 });
88 }
89 }
90 }
91
92 for cap in LINK_HREF_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(2) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 imports.push(ImportInfo {
99 source: href.to_string(),
100 imported_name: ImportedName::SideEffect,
101 local_name: String::new(),
102 is_type_only: false,
103 span: Span::default(),
104 source_span: Span::default(),
105 });
106 }
107 }
108 }
109 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
110 if let Some(m) = cap.get(1) {
111 let href = m.as_str().trim();
112 if !href.is_empty() && !is_remote_url(href) {
113 imports.push(ImportInfo {
114 source: href.to_string(),
115 imported_name: ImportedName::SideEffect,
116 local_name: String::new(),
117 is_type_only: false,
118 span: Span::default(),
119 source_span: Span::default(),
120 });
121 }
122 }
123 }
124
125 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
128 imports.dedup_by(|a, b| a.source == b.source);
129
130 let template_refs = angular::collect_angular_template_refs(source);
134 let member_accesses: Vec<MemberAccess> = template_refs
135 .into_iter()
136 .map(|name| MemberAccess {
137 object: ANGULAR_TPL_SENTINEL.to_string(),
138 member: name,
139 })
140 .collect();
141
142 ModuleInfo {
143 file_id,
144 exports: Vec::new(),
145 imports,
146 re_exports: Vec::new(),
147 dynamic_imports: Vec::new(),
148 dynamic_import_patterns: Vec::new(),
149 require_calls: Vec::new(),
150 member_accesses,
151 whole_object_uses: Vec::new(),
152 has_cjs_exports: false,
153 content_hash,
154 suppressions,
155 unused_import_bindings: Vec::new(),
156 line_offsets: fallow_types::extract::compute_line_offsets(source),
157 complexity: Vec::new(),
158 flag_uses: Vec::new(),
159 }
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
169 fn is_html_file_html() {
170 assert!(is_html_file(Path::new("index.html")));
171 }
172
173 #[test]
174 fn is_html_file_nested() {
175 assert!(is_html_file(Path::new("pages/about.html")));
176 }
177
178 #[test]
179 fn is_html_file_rejects_htm() {
180 assert!(!is_html_file(Path::new("index.htm")));
181 }
182
183 #[test]
184 fn is_html_file_rejects_js() {
185 assert!(!is_html_file(Path::new("app.js")));
186 }
187
188 #[test]
189 fn is_html_file_rejects_ts() {
190 assert!(!is_html_file(Path::new("app.ts")));
191 }
192
193 #[test]
194 fn is_html_file_rejects_vue() {
195 assert!(!is_html_file(Path::new("App.vue")));
196 }
197
198 #[test]
201 fn remote_url_http() {
202 assert!(is_remote_url("http://example.com/script.js"));
203 }
204
205 #[test]
206 fn remote_url_https() {
207 assert!(is_remote_url("https://cdn.example.com/style.css"));
208 }
209
210 #[test]
211 fn remote_url_protocol_relative() {
212 assert!(is_remote_url("//cdn.example.com/lib.js"));
213 }
214
215 #[test]
216 fn remote_url_data() {
217 assert!(is_remote_url("data:text/javascript;base64,abc"));
218 }
219
220 #[test]
221 fn local_relative_not_remote() {
222 assert!(!is_remote_url("./src/entry.js"));
223 }
224
225 #[test]
226 fn local_root_relative_not_remote() {
227 assert!(!is_remote_url("/src/entry.js"));
228 }
229
230 #[test]
233 fn extracts_module_script_src() {
234 let info = parse_html_to_module(
235 FileId(0),
236 r#"<script type="module" src="./src/entry.js"></script>"#,
237 0,
238 );
239 assert_eq!(info.imports.len(), 1);
240 assert_eq!(info.imports[0].source, "./src/entry.js");
241 }
242
243 #[test]
244 fn extracts_plain_script_src() {
245 let info = parse_html_to_module(
246 FileId(0),
247 r#"<script src="./src/polyfills.js"></script>"#,
248 0,
249 );
250 assert_eq!(info.imports.len(), 1);
251 assert_eq!(info.imports[0].source, "./src/polyfills.js");
252 }
253
254 #[test]
255 fn extracts_multiple_scripts() {
256 let info = parse_html_to_module(
257 FileId(0),
258 r#"
259 <script type="module" src="./src/entry.js"></script>
260 <script src="./src/polyfills.js"></script>
261 "#,
262 0,
263 );
264 assert_eq!(info.imports.len(), 2);
265 }
266
267 #[test]
268 fn skips_inline_script() {
269 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
270 assert!(info.imports.is_empty());
271 }
272
273 #[test]
274 fn skips_remote_script() {
275 let info = parse_html_to_module(
276 FileId(0),
277 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
278 0,
279 );
280 assert!(info.imports.is_empty());
281 }
282
283 #[test]
284 fn skips_protocol_relative_script() {
285 let info = parse_html_to_module(
286 FileId(0),
287 r#"<script src="//cdn.example.com/lib.js"></script>"#,
288 0,
289 );
290 assert!(info.imports.is_empty());
291 }
292
293 #[test]
296 fn extracts_stylesheet_link() {
297 let info = parse_html_to_module(
298 FileId(0),
299 r#"<link rel="stylesheet" href="./src/global.css" />"#,
300 0,
301 );
302 assert_eq!(info.imports.len(), 1);
303 assert_eq!(info.imports[0].source, "./src/global.css");
304 }
305
306 #[test]
307 fn extracts_modulepreload_link() {
308 let info = parse_html_to_module(
309 FileId(0),
310 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
311 0,
312 );
313 assert_eq!(info.imports.len(), 1);
314 assert_eq!(info.imports[0].source, "./src/vendor.js");
315 }
316
317 #[test]
318 fn extracts_link_with_reversed_attrs() {
319 let info = parse_html_to_module(
320 FileId(0),
321 r#"<link href="./src/global.css" rel="stylesheet" />"#,
322 0,
323 );
324 assert_eq!(info.imports.len(), 1);
325 assert_eq!(info.imports[0].source, "./src/global.css");
326 }
327
328 #[test]
329 fn skips_preload_link() {
330 let info = parse_html_to_module(
331 FileId(0),
332 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
333 0,
334 );
335 assert!(info.imports.is_empty());
336 }
337
338 #[test]
339 fn skips_icon_link() {
340 let info =
341 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
342 assert!(info.imports.is_empty());
343 }
344
345 #[test]
346 fn skips_remote_stylesheet() {
347 let info = parse_html_to_module(
348 FileId(0),
349 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
350 0,
351 );
352 assert!(info.imports.is_empty());
353 }
354
355 #[test]
358 fn skips_commented_out_script() {
359 let info = parse_html_to_module(
360 FileId(0),
361 r#"<!-- <script src="./old.js"></script> -->
362 <script src="./new.js"></script>"#,
363 0,
364 );
365 assert_eq!(info.imports.len(), 1);
366 assert_eq!(info.imports[0].source, "./new.js");
367 }
368
369 #[test]
370 fn skips_commented_out_link() {
371 let info = parse_html_to_module(
372 FileId(0),
373 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
374 <link rel="stylesheet" href="./new.css" />"#,
375 0,
376 );
377 assert_eq!(info.imports.len(), 1);
378 assert_eq!(info.imports[0].source, "./new.css");
379 }
380
381 #[test]
384 fn handles_multiline_script_tag() {
385 let info = parse_html_to_module(
386 FileId(0),
387 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
388 0,
389 );
390 assert_eq!(info.imports.len(), 1);
391 assert_eq!(info.imports[0].source, "./src/entry.js");
392 }
393
394 #[test]
395 fn handles_multiline_link_tag() {
396 let info = parse_html_to_module(
397 FileId(0),
398 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
399 0,
400 );
401 assert_eq!(info.imports.len(), 1);
402 assert_eq!(info.imports[0].source, "./src/global.css");
403 }
404
405 #[test]
408 fn full_vite_html() {
409 let info = parse_html_to_module(
410 FileId(0),
411 r#"<!doctype html>
412<html>
413 <head>
414 <link rel="stylesheet" href="./src/global.css" />
415 <link rel="icon" href="/favicon.ico" />
416 </head>
417 <body>
418 <div id="app"></div>
419 <script type="module" src="./src/entry.js"></script>
420 </body>
421</html>"#,
422 0,
423 );
424 assert_eq!(info.imports.len(), 2);
425 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
426 assert!(sources.contains(&"./src/global.css"));
427 assert!(sources.contains(&"./src/entry.js"));
428 }
429
430 #[test]
433 fn empty_html() {
434 let info = parse_html_to_module(FileId(0), "", 0);
435 assert!(info.imports.is_empty());
436 }
437
438 #[test]
439 fn html_with_no_assets() {
440 let info = parse_html_to_module(
441 FileId(0),
442 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
443 0,
444 );
445 assert!(info.imports.is_empty());
446 }
447
448 #[test]
449 fn single_quoted_attributes() {
450 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
451 assert_eq!(info.imports.len(), 1);
452 assert_eq!(info.imports[0].source, "./src/entry.js");
453 }
454
455 #[test]
456 fn all_imports_are_side_effect() {
457 let info = parse_html_to_module(
458 FileId(0),
459 r#"<script src="./entry.js"></script>
460 <link rel="stylesheet" href="./style.css" />"#,
461 0,
462 );
463 for imp in &info.imports {
464 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
465 assert!(imp.local_name.is_empty());
466 assert!(!imp.is_type_only);
467 }
468 }
469
470 #[test]
471 fn suppression_comments_extracted() {
472 let info = parse_html_to_module(
473 FileId(0),
474 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
475 0,
476 );
477 assert_eq!(info.imports.len(), 1);
481 }
482
483 #[test]
486 fn angular_template_extracts_member_refs() {
487 let info = parse_html_to_module(
488 FileId(0),
489 "<h1>{{ title() }}</h1>\n\
490 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
491 <button (click)=\"onButtonClick()\">Toggle</button>",
492 0,
493 );
494 let names: rustc_hash::FxHashSet<&str> = info
495 .member_accesses
496 .iter()
497 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
498 .map(|a| a.member.as_str())
499 .collect();
500 assert!(names.contains("title"), "should contain 'title'");
501 assert!(
502 names.contains("isHighlighted"),
503 "should contain 'isHighlighted'"
504 );
505 assert!(names.contains("greeting"), "should contain 'greeting'");
506 assert!(
507 names.contains("onButtonClick"),
508 "should contain 'onButtonClick'"
509 );
510 }
511
512 #[test]
513 fn plain_html_no_angular_refs() {
514 let info = parse_html_to_module(
515 FileId(0),
516 "<!doctype html><html><body><h1>Hello</h1></body></html>",
517 0,
518 );
519 assert!(info.member_accesses.is_empty());
520 }
521}