1use std::path::Path;
8use std::sync::LazyLock;
9
10use oxc_span::Span;
11
12use crate::{ImportInfo, ImportedName, ModuleInfo};
13use fallow_types::discover::FileId;
14
15static HTML_COMMENT_RE: LazyLock<regex::Regex> =
17 LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
18
19static SCRIPT_SRC_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
23 regex::Regex::new(r#"(?si)<script\b(?:[^>"']|"[^"]*"|'[^']*')*?\bsrc\s*=\s*["']([^"']+)["']"#)
24 .expect("valid regex")
25});
26
27static LINK_HREF_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
31 regex::Regex::new(
32 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["'](?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["']"#,
33 )
34 .expect("valid regex")
35});
36
37static LINK_HREF_REVERSE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
39 regex::Regex::new(
40 r#"(?si)<link\b(?:[^>"']|"[^"]*"|'[^']*')*?\bhref\s*=\s*["']([^"']+)["'](?:[^>"']|"[^"]*"|'[^']*')*?\brel\s*=\s*["'](stylesheet|modulepreload)["']"#,
41 )
42 .expect("valid regex")
43});
44
45pub(crate) fn is_html_file(path: &Path) -> bool {
48 path.extension()
49 .and_then(|e| e.to_str())
50 .is_some_and(|ext| ext == "html")
51}
52
53fn is_remote_url(src: &str) -> bool {
55 src.starts_with("http://")
56 || src.starts_with("https://")
57 || src.starts_with("//")
58 || src.starts_with("data:")
59}
60
61pub(crate) fn parse_html_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
63 let suppressions = crate::suppress::parse_suppressions_from_source(source);
64
65 let stripped = HTML_COMMENT_RE.replace_all(source, "");
67
68 let mut imports = Vec::new();
69
70 for cap in SCRIPT_SRC_RE.captures_iter(&stripped) {
72 if let Some(m) = cap.get(1) {
73 let src = m.as_str().trim();
74 if !src.is_empty() && !is_remote_url(src) {
75 imports.push(ImportInfo {
76 source: src.to_string(),
77 imported_name: ImportedName::SideEffect,
78 local_name: String::new(),
79 is_type_only: false,
80 span: Span::default(),
81 source_span: Span::default(),
82 });
83 }
84 }
85 }
86
87 for cap in LINK_HREF_RE.captures_iter(&stripped) {
90 if let Some(m) = cap.get(2) {
91 let href = m.as_str().trim();
92 if !href.is_empty() && !is_remote_url(href) {
93 imports.push(ImportInfo {
94 source: href.to_string(),
95 imported_name: ImportedName::SideEffect,
96 local_name: String::new(),
97 is_type_only: false,
98 span: Span::default(),
99 source_span: Span::default(),
100 });
101 }
102 }
103 }
104 for cap in LINK_HREF_REVERSE_RE.captures_iter(&stripped) {
105 if let Some(m) = cap.get(1) {
106 let href = m.as_str().trim();
107 if !href.is_empty() && !is_remote_url(href) {
108 imports.push(ImportInfo {
109 source: href.to_string(),
110 imported_name: ImportedName::SideEffect,
111 local_name: String::new(),
112 is_type_only: false,
113 span: Span::default(),
114 source_span: Span::default(),
115 });
116 }
117 }
118 }
119
120 imports.sort_unstable_by(|a, b| a.source.cmp(&b.source));
123 imports.dedup_by(|a, b| a.source == b.source);
124
125 ModuleInfo {
126 file_id,
127 exports: Vec::new(),
128 imports,
129 re_exports: Vec::new(),
130 dynamic_imports: Vec::new(),
131 dynamic_import_patterns: Vec::new(),
132 require_calls: Vec::new(),
133 member_accesses: Vec::new(),
134 whole_object_uses: Vec::new(),
135 has_cjs_exports: false,
136 content_hash,
137 suppressions,
138 unused_import_bindings: Vec::new(),
139 line_offsets: fallow_types::extract::compute_line_offsets(source),
140 complexity: Vec::new(),
141 }
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147
148 #[test]
151 fn is_html_file_html() {
152 assert!(is_html_file(Path::new("index.html")));
153 }
154
155 #[test]
156 fn is_html_file_nested() {
157 assert!(is_html_file(Path::new("pages/about.html")));
158 }
159
160 #[test]
161 fn is_html_file_rejects_htm() {
162 assert!(!is_html_file(Path::new("index.htm")));
163 }
164
165 #[test]
166 fn is_html_file_rejects_js() {
167 assert!(!is_html_file(Path::new("app.js")));
168 }
169
170 #[test]
171 fn is_html_file_rejects_ts() {
172 assert!(!is_html_file(Path::new("app.ts")));
173 }
174
175 #[test]
176 fn is_html_file_rejects_vue() {
177 assert!(!is_html_file(Path::new("App.vue")));
178 }
179
180 #[test]
183 fn remote_url_http() {
184 assert!(is_remote_url("http://example.com/script.js"));
185 }
186
187 #[test]
188 fn remote_url_https() {
189 assert!(is_remote_url("https://cdn.example.com/style.css"));
190 }
191
192 #[test]
193 fn remote_url_protocol_relative() {
194 assert!(is_remote_url("//cdn.example.com/lib.js"));
195 }
196
197 #[test]
198 fn remote_url_data() {
199 assert!(is_remote_url("data:text/javascript;base64,abc"));
200 }
201
202 #[test]
203 fn local_relative_not_remote() {
204 assert!(!is_remote_url("./src/entry.js"));
205 }
206
207 #[test]
208 fn local_root_relative_not_remote() {
209 assert!(!is_remote_url("/src/entry.js"));
210 }
211
212 #[test]
215 fn extracts_module_script_src() {
216 let info = parse_html_to_module(
217 FileId(0),
218 r#"<script type="module" src="./src/entry.js"></script>"#,
219 0,
220 );
221 assert_eq!(info.imports.len(), 1);
222 assert_eq!(info.imports[0].source, "./src/entry.js");
223 }
224
225 #[test]
226 fn extracts_plain_script_src() {
227 let info = parse_html_to_module(
228 FileId(0),
229 r#"<script src="./src/polyfills.js"></script>"#,
230 0,
231 );
232 assert_eq!(info.imports.len(), 1);
233 assert_eq!(info.imports[0].source, "./src/polyfills.js");
234 }
235
236 #[test]
237 fn extracts_multiple_scripts() {
238 let info = parse_html_to_module(
239 FileId(0),
240 r#"
241 <script type="module" src="./src/entry.js"></script>
242 <script src="./src/polyfills.js"></script>
243 "#,
244 0,
245 );
246 assert_eq!(info.imports.len(), 2);
247 }
248
249 #[test]
250 fn skips_inline_script() {
251 let info = parse_html_to_module(FileId(0), r#"<script>console.log("hello");</script>"#, 0);
252 assert!(info.imports.is_empty());
253 }
254
255 #[test]
256 fn skips_remote_script() {
257 let info = parse_html_to_module(
258 FileId(0),
259 r#"<script src="https://cdn.example.com/lib.js"></script>"#,
260 0,
261 );
262 assert!(info.imports.is_empty());
263 }
264
265 #[test]
266 fn skips_protocol_relative_script() {
267 let info = parse_html_to_module(
268 FileId(0),
269 r#"<script src="//cdn.example.com/lib.js"></script>"#,
270 0,
271 );
272 assert!(info.imports.is_empty());
273 }
274
275 #[test]
278 fn extracts_stylesheet_link() {
279 let info = parse_html_to_module(
280 FileId(0),
281 r#"<link rel="stylesheet" href="./src/global.css" />"#,
282 0,
283 );
284 assert_eq!(info.imports.len(), 1);
285 assert_eq!(info.imports[0].source, "./src/global.css");
286 }
287
288 #[test]
289 fn extracts_modulepreload_link() {
290 let info = parse_html_to_module(
291 FileId(0),
292 r#"<link rel="modulepreload" href="./src/vendor.js" />"#,
293 0,
294 );
295 assert_eq!(info.imports.len(), 1);
296 assert_eq!(info.imports[0].source, "./src/vendor.js");
297 }
298
299 #[test]
300 fn extracts_link_with_reversed_attrs() {
301 let info = parse_html_to_module(
302 FileId(0),
303 r#"<link href="./src/global.css" rel="stylesheet" />"#,
304 0,
305 );
306 assert_eq!(info.imports.len(), 1);
307 assert_eq!(info.imports[0].source, "./src/global.css");
308 }
309
310 #[test]
311 fn skips_preload_link() {
312 let info = parse_html_to_module(
313 FileId(0),
314 r#"<link rel="preload" href="./src/font.woff2" as="font" />"#,
315 0,
316 );
317 assert!(info.imports.is_empty());
318 }
319
320 #[test]
321 fn skips_icon_link() {
322 let info =
323 parse_html_to_module(FileId(0), r#"<link rel="icon" href="./favicon.ico" />"#, 0);
324 assert!(info.imports.is_empty());
325 }
326
327 #[test]
328 fn skips_remote_stylesheet() {
329 let info = parse_html_to_module(
330 FileId(0),
331 r#"<link rel="stylesheet" href="https://fonts.googleapis.com/css" />"#,
332 0,
333 );
334 assert!(info.imports.is_empty());
335 }
336
337 #[test]
340 fn skips_commented_out_script() {
341 let info = parse_html_to_module(
342 FileId(0),
343 r#"<!-- <script src="./old.js"></script> -->
344 <script src="./new.js"></script>"#,
345 0,
346 );
347 assert_eq!(info.imports.len(), 1);
348 assert_eq!(info.imports[0].source, "./new.js");
349 }
350
351 #[test]
352 fn skips_commented_out_link() {
353 let info = parse_html_to_module(
354 FileId(0),
355 r#"<!-- <link rel="stylesheet" href="./old.css" /> -->
356 <link rel="stylesheet" href="./new.css" />"#,
357 0,
358 );
359 assert_eq!(info.imports.len(), 1);
360 assert_eq!(info.imports[0].source, "./new.css");
361 }
362
363 #[test]
366 fn handles_multiline_script_tag() {
367 let info = parse_html_to_module(
368 FileId(0),
369 "<script\n type=\"module\"\n src=\"./src/entry.js\"\n></script>",
370 0,
371 );
372 assert_eq!(info.imports.len(), 1);
373 assert_eq!(info.imports[0].source, "./src/entry.js");
374 }
375
376 #[test]
377 fn handles_multiline_link_tag() {
378 let info = parse_html_to_module(
379 FileId(0),
380 "<link\n rel=\"stylesheet\"\n href=\"./src/global.css\"\n/>",
381 0,
382 );
383 assert_eq!(info.imports.len(), 1);
384 assert_eq!(info.imports[0].source, "./src/global.css");
385 }
386
387 #[test]
390 fn full_vite_html() {
391 let info = parse_html_to_module(
392 FileId(0),
393 r#"<!doctype html>
394<html>
395 <head>
396 <link rel="stylesheet" href="./src/global.css" />
397 <link rel="icon" href="/favicon.ico" />
398 </head>
399 <body>
400 <div id="app"></div>
401 <script type="module" src="./src/entry.js"></script>
402 </body>
403</html>"#,
404 0,
405 );
406 assert_eq!(info.imports.len(), 2);
407 let sources: Vec<&str> = info.imports.iter().map(|i| i.source.as_str()).collect();
408 assert!(sources.contains(&"./src/global.css"));
409 assert!(sources.contains(&"./src/entry.js"));
410 }
411
412 #[test]
415 fn empty_html() {
416 let info = parse_html_to_module(FileId(0), "", 0);
417 assert!(info.imports.is_empty());
418 }
419
420 #[test]
421 fn html_with_no_assets() {
422 let info = parse_html_to_module(
423 FileId(0),
424 r"<!doctype html><html><body><h1>Hello</h1></body></html>",
425 0,
426 );
427 assert!(info.imports.is_empty());
428 }
429
430 #[test]
431 fn single_quoted_attributes() {
432 let info = parse_html_to_module(FileId(0), r"<script src='./src/entry.js'></script>", 0);
433 assert_eq!(info.imports.len(), 1);
434 assert_eq!(info.imports[0].source, "./src/entry.js");
435 }
436
437 #[test]
438 fn all_imports_are_side_effect() {
439 let info = parse_html_to_module(
440 FileId(0),
441 r#"<script src="./entry.js"></script>
442 <link rel="stylesheet" href="./style.css" />"#,
443 0,
444 );
445 for imp in &info.imports {
446 assert!(matches!(imp.imported_name, ImportedName::SideEffect));
447 assert!(imp.local_name.is_empty());
448 assert!(!imp.is_type_only);
449 }
450 }
451
452 #[test]
453 fn suppression_comments_extracted() {
454 let info = parse_html_to_module(
455 FileId(0),
456 "<!-- fallow-ignore-file -->\n<script src=\"./entry.js\"></script>",
457 0,
458 );
459 assert_eq!(info.imports.len(), 1);
463 }
464}