1use std::path::Path;
12use std::sync::LazyLock;
13
14use oxc_span::Span;
15
16use crate::sfc_template::angular::{self, ANGULAR_TPL_SENTINEL};
17use crate::{ImportInfo, ImportedName, MemberAccess, ModuleInfo};
18use fallow_types::discover::FileId;
19
20static HTML_COMMENT_RE: LazyLock<regex::Regex> =
22 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
23
24static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
28 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
29 .expect("valid regex")
30});
31
32static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
36 regex::Regex::new(
37 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
38 )
39 .expect("valid regex")
40});
41
42static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
44 regex::Regex::new(
45 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
46 )
47 .expect("valid regex")
48});
49
50pub(crate) fn is_html_file(path: &Path) -> bool {
53 path.extension()
54 .and_then(|e| e.to_str())
55 .is_some_and(|ext| ext == "html")
56}
57
58fn is_remote_url(src: &str) -> bool {
60 src.starts_with("http://")
61 || src.starts_with("https://")
62 || src.starts_with("//")
63 || src.starts_with("data:")
64}
65
66pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
68 let suppressions = crate::suppress::parse_suppressions_from_source(source);
69
70 let stripped = HTML_COMMENT_RE.replace_all(source, "");
72
73 let mut imports = Vec::new();
74
75 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
77 if let Some(m) = cap.get(1) {
78 let src = m.as_str().trim();
79 if !src.is_empty() && !is_remote_url(src) {
80 imports.push(ImportInfo {
81 source: src.to_string(),
82 imported_name: ImportedName::SideEffect,
83 local_name: String::new(),
84 is_type_only: false,
85 span: Span::default(),
86 source_span: Span::default(),
87 });
88 }
89 }
90 }
91
92 for cap in LINK_HREF_RE.captures_iter(&stripped) {
95 if let Some(m) = cap.get(2) {
96 let href = m.as_str().trim();
97 if !href.is_empty() && !is_remote_url(href) {
98 imports.push(ImportInfo {
99 source: href.to_string(),
100 imported_name: ImportedName::SideEffect,
101 local_name: String::new(),
102 is_type_only: false,
103 span: Span::default(),
104 source_span: Span::default(),
105 });
106 }
107 }
108 }
109 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
110 if let Some(m) = cap.get(1) {
111 let href = m.as_str().trim();
112 if !href.is_empty() && !is_remote_url(href) {
113 imports.push(ImportInfo {
114 source: href.to_string(),
115 imported_name: ImportedName::SideEffect,
116 local_name: String::new(),
117 is_type_only: false,
118 span: Span::default(),
119 source_span: Span::default(),
120 });
121 }
122 }
123 }
124
125 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
128 imports.dedup_by(|a, b| a.source == b.source);
129
130 let template_refs = angular::collect_angular_template_refs(source);
134 let member_accesses: Vec<MemberAccess> = template_refs
135 .into_iter()
136 .map(|name| MemberAccess {
137 object: ANGULAR_TPL_SENTINEL.to_string(),
138 member: name,
139 })
140 .collect();
141
142 ModuleInfo {
143 file_id,
144 exports: Vec::new(),
145 imports,
146 re_exports: Vec::new(),
147 dynamic_imports: Vec::new(),
148 dynamic_import_patterns: Vec::new(),
149 require_calls: Vec::new(),
150 member_accesses,
151 whole_object_uses: Vec::new(),
152 has_cjs_exports: false,
153 content_hash,
154 suppressions,
155 unused_import_bindings: Vec::new(),
156 line_offsets: fallow_types::extract::compute_line_offsets(source),
157 complexity: Vec::new(),
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 #[test]
168 fn is_html_file_html() {
169 assert!(is_html_file(Path::new("index.html")));
170 }
171
172 #[test]
173 fn is_html_file_nested() {
174 assert!(is_html_file(Path::new("pages/about.html")));
175 }
176
177 #[test]
178 fn is_html_file_rejects_htm() {
179 assert!(!is_html_file(Path::new("index.htm")));
180 }
181
182 #[test]
183 fn is_html_file_rejects_js() {
184 assert!(!is_html_file(Path::new("app.js")));
185 }
186
187 #[test]
188 fn is_html_file_rejects_ts() {
189 assert!(!is_html_file(Path::new("app.ts")));
190 }
191
192 #[test]
193 fn is_html_file_rejects_vue() {
194 assert!(!is_html_file(Path::new("App.vue")));
195 }
196
197 #[test]
200 fn remote_url_http() {
201 assert!(is_remote_url("http://example.com/script.js"));
202 }
203
204 #[test]
205 fn remote_url_https() {
206 assert!(is_remote_url("https://cdn.example.com/style.css"));
207 }
208
209 #[test]
210 fn remote_url_protocol_relative() {
211 assert!(is_remote_url("//cdn.example.com/lib.js"));
212 }
213
214 #[test]
215 fn remote_url_data() {
216 assert!(is_remote_url("data:text/javascript;base64,abc"));
217 }
218
219 #[test]
220 fn local_relative_not_remote() {
221 assert!(!is_remote_url("./src/entry.js"));
222 }
223
224 #[test]
225 fn local_root_relative_not_remote() {
226 assert!(!is_remote_url("/src/entry.js"));
227 }
228
229 #[test]
232 fn extracts_module_script_src() {
233 let info = parse_html_to_module(
234 FileId(0),
235 r#"<script type="module" src="./src/entry.js"></script>"#,
236 0,
237 );
238 assert_eq!(info.imports.len(), 1);
239 assert_eq!(info.imports[0].source, "./src/entry.js");
240 }
241
242 #[test]
243 fn extracts_plain_script_src() {
244 let info = parse_html_to_module(
245 FileId(0),
246 r#"<script src="./src/polyfills.js"></script>"#,
247 0,
248 );
249 assert_eq!(info.imports.len(), 1);
250 assert_eq!(info.imports[0].source, "./src/polyfills.js");
251 }
252
253 #[test]
254 fn extracts_multiple_scripts() {
255 let info = parse_html_to_module(
256 FileId(0),
257 r#"
258 <script type="module" src="./src/entry.js"></script>
259 <script src="./src/polyfills.js"></script>
260 "#,
261 0,
262 );
263 assert_eq!(info.imports.len(), 2);
264 }
265
266 #[test]
267 fn skips_inline_script() {
268 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
269 assert!(info.imports.is_empty());
270 }
271
272 #[test]
273 fn skips_remote_script() {
274 let info = parse_html_to_module(
275 FileId(0),
276 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
277 0,
278 );
279 assert!(info.imports.is_empty());
280 }
281
282 #[test]
283 fn skips_protocol_relative_script() {
284 let info = parse_html_to_module(
285 FileId(0),
286 r#"<script src="//cdn.example.com/lib.js"></script>"#,
287 0,
288 );
289 assert!(info.imports.is_empty());
290 }
291
292 #[test]
295 fn extracts_stylesheet_link() {
296 let info = parse_html_to_module(
297 FileId(0),
298 r#"<link rel="stylesheet" href="./src/global.css" />"#,
299 0,
300 );
301 assert_eq!(info.imports.len(), 1);
302 assert_eq!(info.imports[0].source, "./src/global.css");
303 }
304
305 #[test]
306 fn extracts_modulepreload_link() {
307 let info = parse_html_to_module(
308 FileId(0),
309 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
310 0,
311 );
312 assert_eq!(info.imports.len(), 1);
313 assert_eq!(info.imports[0].source, "./src/vendor.js");
314 }
315
316 #[test]
317 fn extracts_link_with_reversed_attrs() {
318 let info = parse_html_to_module(
319 FileId(0),
320 r#"<link href="./src/global.css" rel="stylesheet" />"#,
321 0,
322 );
323 assert_eq!(info.imports.len(), 1);
324 assert_eq!(info.imports[0].source, "./src/global.css");
325 }
326
327 #[test]
328 fn skips_preload_link() {
329 let info = parse_html_to_module(
330 FileId(0),
331 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
332 0,
333 );
334 assert!(info.imports.is_empty());
335 }
336
337 #[test]
338 fn skips_icon_link() {
339 let info =
340 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
341 assert!(info.imports.is_empty());
342 }
343
344 #[test]
345 fn skips_remote_stylesheet() {
346 let info = parse_html_to_module(
347 FileId(0),
348 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
349 0,
350 );
351 assert!(info.imports.is_empty());
352 }
353
354 #[test]
357 fn skips_commented_out_script() {
358 let info = parse_html_to_module(
359 FileId(0),
360 r#"<!-- <script src="./old.js"></script> -->
361 <script src="./new.js"></script>"#,
362 0,
363 );
364 assert_eq!(info.imports.len(), 1);
365 assert_eq!(info.imports[0].source, "./new.js");
366 }
367
368 #[test]
369 fn skips_commented_out_link() {
370 let info = parse_html_to_module(
371 FileId(0),
372 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
373 <link rel="stylesheet" href="./new.css" />"#,
374 0,
375 );
376 assert_eq!(info.imports.len(), 1);
377 assert_eq!(info.imports[0].source, "./new.css");
378 }
379
380 #[test]
383 fn handles_multiline_script_tag() {
384 let info = parse_html_to_module(
385 FileId(0),
386 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
387 0,
388 );
389 assert_eq!(info.imports.len(), 1);
390 assert_eq!(info.imports[0].source, "./src/entry.js");
391 }
392
393 #[test]
394 fn handles_multiline_link_tag() {
395 let info = parse_html_to_module(
396 FileId(0),
397 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
398 0,
399 );
400 assert_eq!(info.imports.len(), 1);
401 assert_eq!(info.imports[0].source, "./src/global.css");
402 }
403
404 #[test]
407 fn full_vite_html() {
408 let info = parse_html_to_module(
409 FileId(0),
410 r#"<!doctype html>
411<html>
412 <head>
413 <link rel="stylesheet" href="./src/global.css" />
414 <link rel="icon" href="/favicon.ico" />
415 </head>
416 <body>
417 <div id="app"></div>
418 <script type="module" src="./src/entry.js"></script>
419 </body>
420</html>"#,
421 0,
422 );
423 assert_eq!(info.imports.len(), 2);
424 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
425 assert!(sources.contains(&"./src/global.css"));
426 assert!(sources.contains(&"./src/entry.js"));
427 }
428
429 #[test]
432 fn empty_html() {
433 let info = parse_html_to_module(FileId(0), "", 0);
434 assert!(info.imports.is_empty());
435 }
436
437 #[test]
438 fn html_with_no_assets() {
439 let info = parse_html_to_module(
440 FileId(0),
441 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
442 0,
443 );
444 assert!(info.imports.is_empty());
445 }
446
447 #[test]
448 fn single_quoted_attributes() {
449 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
450 assert_eq!(info.imports.len(), 1);
451 assert_eq!(info.imports[0].source, "./src/entry.js");
452 }
453
454 #[test]
455 fn all_imports_are_side_effect() {
456 let info = parse_html_to_module(
457 FileId(0),
458 r#"<script src="./entry.js"></script>
459 <link rel="stylesheet" href="./style.css" />"#,
460 0,
461 );
462 for imp in &info.imports {
463 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
464 assert!(imp.local_name.is_empty());
465 assert!(!imp.is_type_only);
466 }
467 }
468
469 #[test]
470 fn suppression_comments_extracted() {
471 let info = parse_html_to_module(
472 FileId(0),
473 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
474 0,
475 );
476 assert_eq!(info.imports.len(), 1);
480 }
481
482 #[test]
485 fn angular_template_extracts_member_refs() {
486 let info = parse_html_to_module(
487 FileId(0),
488 "<h1>{{ title() }}</h1>\n\
489 <p [class.highlighted]=\"isHighlighted\">{{ greeting() }}</p>\n\
490 <button (click)=\"onButtonClick()\">Toggle</button>",
491 0,
492 );
493 let names: rustc_hash::FxHashSet<&str> = info
494 .member_accesses
495 .iter()
496 .filter(|a| a.object == ANGULAR_TPL_SENTINEL)
497 .map(|a| a.member.as_str())
498 .collect();
499 assert!(names.contains("title"), "should contain 'title'");
500 assert!(
501 names.contains("isHighlighted"),
502 "should contain 'isHighlighted'"
503 );
504 assert!(names.contains("greeting"), "should contain 'greeting'");
505 assert!(
506 names.contains("onButtonClick"),
507 "should contain 'onButtonClick'"
508 );
509 }
510
511 #[test]
512 fn plain_html_no_angular_refs() {
513 let info = parse_html_to_module(
514 FileId(0),
515 "<!doctype html><html><body><h1>Hello</h1></body></html>",
516 0,
517 );
518 assert!(info.member_accesses.is_empty());
519 }
520}