pub struct ScriptBlock {
pub content: Vec<u8>,
pub lang: &'static str,
pub start_line: u32,
}
pub fn extract_script_blocks(source: &[u8], extension: &str) -> Vec<ScriptBlock> {
match extension {
"html" => extract_html_script_tags(source, "javascript"),
"vue" => extract_vue_scripts(source),
"svelte" => extract_svelte_scripts(source),
"astro" => extract_astro_scripts(source),
_ => Vec::new(),
}
}
fn extract_vue_scripts(source: &[u8]) -> Vec<ScriptBlock> {
extract_html_script_tags(source, "javascript")
}
fn extract_svelte_scripts(source: &[u8]) -> Vec<ScriptBlock> {
extract_html_script_tags(source, "javascript")
}
fn extract_astro_scripts(source: &[u8]) -> Vec<ScriptBlock> {
let mut blocks = Vec::new();
let text = String::from_utf8_lossy(source);
if let Some(first) = text.find("---") {
let after_first = first + 3;
if let Some(rest) = text.get(after_first..)
&& let Some(second) = rest.find("---")
{
let mut fm_start = after_first;
if text.as_bytes().get(fm_start) == Some(&b'\n') {
fm_start += 1;
} else if text.as_bytes().get(fm_start) == Some(&b'\r')
&& text.as_bytes().get(fm_start + 1) == Some(&b'\n')
{
fm_start += 2;
}
let fm_end = after_first + second;
if let Some(frontmatter) = text.get(fm_start..fm_end) {
let start_line = text.get(..fm_start).map(count_newlines_in).unwrap_or(0) + 1;
if !frontmatter.trim().is_empty() {
blocks.push(ScriptBlock {
content: frontmatter.as_bytes().to_vec(),
lang: "typescript",
start_line: start_line as u32,
});
}
}
}
}
blocks.extend(extract_html_script_tags(source, "typescript"));
blocks
}
fn extract_html_script_tags(source: &[u8], default_lang: &str) -> Vec<ScriptBlock> {
let mut blocks = Vec::new();
let text = String::from_utf8_lossy(source);
let text_lower = text.to_ascii_lowercase();
let mut search_from = 0;
while let Some(rest) = text_lower.get(search_from..) {
let Some(pos) = rest.find("<script") else {
break;
};
let tag_start = search_from + pos;
let after_script = tag_start + 7; let Some(&next_char) = text.as_bytes().get(after_script) else {
break;
};
if next_char != b' '
&& next_char != b'\t'
&& next_char != b'\n'
&& next_char != b'\r'
&& next_char != b'>'
{
search_from = after_script;
continue;
}
let Some(rest_after_script) = text.get(after_script..) else {
break;
};
let tag_close = match rest_after_script.find('>') {
Some(pos) => after_script + pos,
None => break,
};
let Some(open_tag) = text.get(tag_start..=tag_close) else {
break;
};
let lang = detect_script_lang(open_tag, default_lang);
let mut content_start = tag_close + 1;
if text.as_bytes().get(content_start) == Some(&b'\n') {
content_start += 1;
} else if text.as_bytes().get(content_start) == Some(&b'\r')
&& text.as_bytes().get(content_start + 1) == Some(&b'\n')
{
content_start += 2;
}
let Some(rest_content) = text_lower.get(content_start..) else {
break;
};
let content_end = match rest_content.find("</script") {
Some(pos) => content_start + pos,
None => break,
};
let Some(content) = text.get(content_start..content_end) else {
break;
};
let start_line = text
.get(..content_start)
.map(count_newlines_in)
.unwrap_or(0)
+ 1;
if !content.trim().is_empty() {
blocks.push(ScriptBlock {
content: content.as_bytes().to_vec(),
lang,
start_line: start_line as u32,
});
}
search_from = content_end;
let Some(rest_close) = text_lower.get(search_from..) else {
break;
};
if let Some(pos) = rest_close.find('>') {
search_from += pos + 1;
} else {
break;
}
}
blocks
}
fn detect_script_lang(open_tag: &str, default_lang: &str) -> &'static str {
let lower = open_tag.to_ascii_lowercase();
if let Some(pos) = lower.find("lang=") {
let after_eq = pos + 5;
let rest = lower.get(after_eq..).unwrap_or("");
let rest = rest.trim_start_matches(['"', '\'']);
if rest.starts_with("ts") || rest.starts_with("typescript") {
return "typescript";
}
if rest.starts_with("js") || rest.starts_with("javascript") {
return "javascript";
}
}
if default_lang == "typescript" {
"typescript"
} else {
"javascript"
}
}
fn count_newlines_in(s: &str) -> usize {
s.bytes().filter(|&b| b == b'\n').count()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_vue_basic() {
let source = b"<template>\n <div>hello</div>\n</template>\n\n<script setup lang=\"ts\">\nimport { ref } from 'vue'\nconst msg = ref('hi')\n</script>\n";
let blocks = extract_script_blocks(source, "vue");
assert_eq!(blocks.len(), 1);
assert_eq!(blocks[0].lang, "typescript");
assert_eq!(blocks[0].start_line, 6);
let content = String::from_utf8_lossy(&blocks[0].content);
assert!(content.contains("import { ref }"));
assert!(content.contains("const msg"));
}
#[test]
fn test_vue_two_scripts() {
let source = b"<script>\nexport default { name: 'Foo' }\n</script>\n\n<script setup lang=\"ts\">\nconst x = 1\n</script>\n";
let blocks = extract_script_blocks(source, "vue");
assert_eq!(blocks.len(), 2);
assert_eq!(blocks[0].lang, "javascript");
assert_eq!(blocks[0].start_line, 2);
assert_eq!(blocks[1].lang, "typescript");
assert_eq!(blocks[1].start_line, 6);
}
#[test]
fn test_vue_no_script() {
let source = b"<template>\n <div>hello</div>\n</template>\n";
let blocks = extract_script_blocks(source, "vue");
assert!(blocks.is_empty());
}
#[test]
fn test_astro_frontmatter() {
let source = b"---\nimport Layout from './Layout.astro'\nconst title = 'Hello'\n---\n\n<Layout title={title}>\n <h1>Hello</h1>\n</Layout>\n";
let blocks = extract_script_blocks(source, "astro");
assert_eq!(blocks.len(), 1);
assert_eq!(blocks[0].lang, "typescript");
assert_eq!(blocks[0].start_line, 2);
let content = String::from_utf8_lossy(&blocks[0].content);
assert!(content.contains("import Layout"));
}
#[test]
fn test_svelte_basic() {
let source = b"<script lang=\"ts\">\n let count = 0\n function inc() { count++ }\n</script>\n\n<button on:click={inc}>{count}</button>\n";
let blocks = extract_script_blocks(source, "svelte");
assert_eq!(blocks.len(), 1);
assert_eq!(blocks[0].lang, "typescript");
assert_eq!(blocks[0].start_line, 2);
}
#[test]
fn test_html_script_extraction() {
let source = b"<!DOCTYPE html>\n<html>\n<head>\n<script>\nfunction greet(name) {\n return 'Hello ' + name;\n}\n</script>\n</head>\n<body></body>\n</html>\n";
let blocks = extract_script_blocks(source, "html");
assert_eq!(blocks.len(), 1);
assert_eq!(blocks[0].lang, "javascript");
assert_eq!(blocks[0].start_line, 5);
let content = String::from_utf8_lossy(&blocks[0].content);
assert!(content.contains("function greet"));
}
#[test]
fn test_html_no_script() {
let source = b"<!DOCTYPE html>\n<html><body><p>Hello</p></body></html>\n";
let blocks = extract_script_blocks(source, "html");
assert!(blocks.is_empty());
}
#[test]
fn test_html_typescript_script() {
let source = b"<html>\n<body>\n<script lang=\"ts\">\nconst x: number = 42;\n</script>\n</body>\n</html>\n";
let blocks = extract_script_blocks(source, "html");
assert_eq!(blocks.len(), 1);
assert_eq!(blocks[0].lang, "typescript");
}
#[test]
fn test_detect_lang_ts() {
assert_eq!(
detect_script_lang("<script lang=\"ts\">", "javascript"),
"typescript"
);
assert_eq!(
detect_script_lang("<script lang='typescript'>", "javascript"),
"typescript"
);
assert_eq!(
detect_script_lang("<script setup lang=\"ts\">", "javascript"),
"typescript"
);
}
#[test]
fn test_detect_lang_default() {
assert_eq!(detect_script_lang("<script>", "javascript"), "javascript");
assert_eq!(
detect_script_lang("<script setup>", "javascript"),
"javascript"
);
assert_eq!(detect_script_lang("<script>", "typescript"), "typescript");
}
}