use crate::parser::SemanticExtractor;
use crate::types::{FunctionInfo, SemanticAnalysis};
use regex::Regex;
use std::sync::LazyLock;
use tracing;
static CSS_SELECTOR: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[.#][\w-]+[\s,:{]").expect("valid CSS selector pattern"));
static YAML_TOP_KEY: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^(\w[\w-]*): ").expect("valid YAML top-level key pattern"));
static JSON_FIRST_KEY: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"^\s{0,2}"(\w+)":"#).expect("valid JSON first-level key pattern")
});
static TOML_SECTION: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\[([^\]]+)\]").expect("valid TOML section header pattern"));
pub fn extract_css(source: &str) -> SemanticAnalysis {
let mut functions = Vec::new();
for (idx, line) in source.lines().enumerate() {
let trimmed = line.trim_start();
if CSS_SELECTOR.is_match(trimmed) {
let name = trimmed
.trim_end_matches(|c: char| c == '{' || c == ',' || c == ':' || c.is_whitespace())
.to_string();
if !name.is_empty() {
let line_no = idx + 1;
functions.push(FunctionInfo {
name,
line: line_no,
end_line: line_no,
parameters: Vec::new(),
return_type: None,
});
}
}
}
SemanticAnalysis {
functions,
..Default::default()
}
}
pub fn extract_yaml(source: &str) -> SemanticAnalysis {
let mut functions = Vec::new();
for (idx, line) in source.lines().enumerate() {
if let Some(caps) = YAML_TOP_KEY.captures(line) {
let name = caps[1].to_string();
let line_no = idx + 1;
functions.push(FunctionInfo {
name,
line: line_no,
end_line: line_no,
parameters: Vec::new(),
return_type: None,
});
}
}
SemanticAnalysis {
functions,
..Default::default()
}
}
pub fn extract_json(source: &str) -> SemanticAnalysis {
let mut functions = Vec::new();
for (idx, line) in source.lines().enumerate() {
if let Some(caps) = JSON_FIRST_KEY.captures(line) {
let name = caps[1].to_string();
let line_no = idx + 1;
functions.push(FunctionInfo {
name,
line: line_no,
end_line: line_no,
parameters: Vec::new(),
return_type: None,
});
}
}
SemanticAnalysis {
functions,
..Default::default()
}
}
pub fn extract_toml(source: &str) -> SemanticAnalysis {
let mut functions = Vec::new();
for (idx, line) in source.lines().enumerate() {
if let Some(caps) = TOML_SECTION.captures(line) {
let name = caps[1].to_string();
let line_no = idx + 1;
functions.push(FunctionInfo {
name,
line: line_no,
end_line: line_no,
parameters: Vec::new(),
return_type: None,
});
}
}
SemanticAnalysis {
functions,
..Default::default()
}
}
pub fn extract_astro(source: &str) -> SemanticAnalysis {
let block = extract_frontmatter(source);
let Some(frontmatter) = block else {
return SemanticAnalysis::default();
};
SemanticExtractor::extract(&frontmatter, "typescript", None, None).unwrap_or_else(|err| {
tracing::warn!(error = %err, "astro TypeScript extractor failed; returning empty analysis");
SemanticAnalysis::default()
})
}
fn extract_frontmatter(source: &str) -> Option<String> {
let mut delimiters = source
.lines()
.enumerate()
.filter(|(_, line)| line.starts_with("---"));
let (first, _) = delimiters.next()?;
let (second, _) = delimiters.next()?;
let block: Vec<&str> = source
.lines()
.skip(first + 1)
.take(second - first - 1)
.collect();
Some(block.join("\n"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_regex_fallback_css_basic() {
let source = ".container {\n color: red;\n}\n#header {\n font-size: 16px;\n}\n";
let result = extract_css(source);
let names: Vec<&str> = result.functions.iter().map(|f| f.name.as_str()).collect();
assert!(
names.contains(&".container"),
"expected .container in {names:?}"
);
assert!(names.contains(&"#header"), "expected #header in {names:?}");
}
#[test]
fn test_regex_fallback_yaml_basic() {
let source = "name: my-project\nversion: 1.0\n nested: value\n";
let result = extract_yaml(source);
let names: Vec<&str> = result.functions.iter().map(|f| f.name.as_str()).collect();
assert!(names.contains(&"name"), "expected name in {names:?}");
assert!(names.contains(&"version"), "expected version in {names:?}");
assert!(
!names.contains(&"nested"),
"nested must not appear in {names:?}"
);
}
#[test]
fn test_regex_fallback_json_basic() {
let source = "{\n \"name\": \"project\",\n \"version\": \"1.0\"\n}\n";
let result = extract_json(source);
let names: Vec<&str> = result.functions.iter().map(|f| f.name.as_str()).collect();
assert!(names.contains(&"name"), "expected name in {names:?}");
assert!(names.contains(&"version"), "expected version in {names:?}");
}
#[test]
fn test_regex_fallback_toml_basic() {
let source = "[package]\nname = \"my-crate\"\n\n[dependencies]\nregex = \"1\"\n";
let result = extract_toml(source);
let names: Vec<&str> = result.functions.iter().map(|f| f.name.as_str()).collect();
assert!(names.contains(&"package"), "expected package in {names:?}");
assert!(
names.contains(&"dependencies"),
"expected dependencies in {names:?}"
);
}
#[cfg(feature = "lang-typescript")]
#[test]
fn test_regex_fallback_astro_basic() {
let source =
"---\nimport Foo from './Foo.astro';\nconst title = 'Hello';\n---\n<h1>{title}</h1>\n";
let result = extract_astro(source);
assert!(
!result.imports.is_empty() || !result.functions.is_empty(),
"expected imports or functions from frontmatter; got empty result"
);
}
#[test]
fn test_regex_fallback_astro_no_frontmatter() {
let source = "<h1>Hello World</h1>\n<p>No frontmatter here.</p>\n";
let result = extract_astro(source);
assert!(result.functions.is_empty());
assert!(result.imports.is_empty());
}
#[test]
fn test_regex_fallback_empty_file() {
assert!(extract_css("").functions.is_empty());
assert!(extract_yaml("").functions.is_empty());
assert!(extract_json("").functions.is_empty());
assert!(extract_toml("").functions.is_empty());
assert!(extract_astro("").functions.is_empty());
}
}