use super::analysis::extract_file_imports;
use super::ast::{
find_class_body, get_node_name, is_class_node, is_constant_node, is_function_node,
};
use super::extract::{extract_class, extract_constant, extract_function};
use super::language::get_tree_sitter_language;
use super::types::{CodeUnit, Language, UnitType};
use std::path::Path;
use tree_sitter::{Node, Parser};
struct HtmlBlock {
content: String,
start_line: usize,
}
fn extract_script_blocks(source: &str) -> Vec<HtmlBlock> {
let lines: Vec<&str> = source.lines().collect();
let mut blocks = Vec::new();
let mut in_script = false;
let mut script_start_line = 0;
let mut content_lines: Vec<&str> = Vec::new();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if !in_script {
if trimmed.starts_with("<script")
&& !trimmed.contains("src=")
&& (trimmed.contains('>') || trimmed.ends_with('>'))
{
in_script = true;
script_start_line = i + 1;
content_lines.clear();
if let Some(pos) = trimmed.find('>') {
let after_tag = &trimmed[pos + 1..];
if !after_tag.trim().is_empty() && !after_tag.trim().starts_with("</script") {
content_lines.push(after_tag);
script_start_line = i;
}
}
}
} else {
if trimmed.starts_with("</script") || trimmed.contains("</script>") {
if let Some(pos) = line.find("</script") {
let before_tag = &line[..pos];
if !before_tag.trim().is_empty() {
content_lines.push(before_tag);
}
}
if !content_lines.is_empty() {
blocks.push(HtmlBlock {
content: content_lines.join("\n"),
start_line: script_start_line,
});
}
in_script = false;
content_lines.clear();
} else {
content_lines.push(line);
}
}
}
blocks
}
fn extract_html_structure(source: &str) -> Option<HtmlBlock> {
let lines: Vec<&str> = source.lines().collect();
let mut structure_lines: Vec<(usize, &str)> = Vec::new();
let mut in_script = false;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("<script") && !trimmed.contains("src=") && trimmed.contains('>') {
structure_lines.push((i, *line));
in_script = true;
continue;
}
if in_script && (trimmed.starts_with("</script") || trimmed.contains("</script>")) {
structure_lines.push((i, *line));
in_script = false;
continue;
}
if !in_script && !trimmed.is_empty() {
structure_lines.push((i, *line));
}
}
if structure_lines.is_empty() {
return None;
}
let start_line = structure_lines.first().map(|(i, _)| *i).unwrap_or(0);
let content: String = structure_lines
.iter()
.map(|(_, l)| *l)
.collect::<Vec<_>>()
.join("\n");
Some(HtmlBlock {
content,
start_line,
})
}
fn create_html_structure_unit(path: &Path, html: &HtmlBlock) -> CodeUnit {
let start_line = html.start_line + 1; let end_line = start_line + html.content.lines().count().saturating_sub(1);
let signature = html
.content
.lines()
.find(|l| !l.trim().is_empty())
.map(|l| l.trim().to_string())
.unwrap_or_default();
CodeUnit {
name: "html_structure".to_string(),
qualified_name: format!("{}::html_structure", path.display()),
file: path.to_path_buf(),
line: start_line,
end_line,
language: Language::Html,
unit_type: UnitType::RawCode,
signature,
docstring: None,
parameters: Vec::new(),
return_type: None,
extends: None,
parent_class: None,
calls: Vec::new(),
called_by: Vec::new(),
complexity: 1,
has_loops: false,
has_branches: false,
has_error_handling: false,
variables: Vec::new(),
imports: Vec::new(),
code: html.content.clone(),
}
}
pub fn extract_html_units(path: &Path, source: &str) -> Vec<CodeUnit> {
let mut units = Vec::new();
for script in extract_script_blocks(source) {
let (mut script_units, depth_limit_hit) = parse_script_content(path, &script.content);
if depth_limit_hit {
return Vec::new();
}
for unit in &mut script_units {
unit.line += script.start_line;
unit.end_line += script.start_line;
}
units.extend(script_units);
}
if let Some(html) = extract_html_structure(source) {
units.push(create_html_structure_unit(path, &html));
}
units
}
fn parse_script_content(path: &Path, script_source: &str) -> (Vec<CodeUnit>, bool) {
let lang = Language::JavaScript;
let max_depth = super::max_recursion_depth();
let mut parser = Parser::new();
if parser
.set_language(&get_tree_sitter_language(lang))
.is_err()
{
return (Vec::new(), false);
}
let tree = match parser.parse(script_source, None) {
Some(t) => t,
None => return (Vec::new(), false),
};
let lines: Vec<&str> = script_source.lines().collect();
let bytes = script_source.as_bytes();
let file_imports = extract_file_imports(tree.root_node(), bytes, lang);
let mut units = Vec::new();
let mut depth_limit_hit = false;
extract_from_node(
tree.root_node(),
path,
&lines,
bytes,
lang,
&mut units,
None,
&file_imports,
0,
max_depth,
&mut depth_limit_hit,
);
if depth_limit_hit {
eprintln!(
"⚠️ Skipping {} (AST nesting exceeded max depth: {})",
path.display(),
max_depth
);
return (Vec::new(), true);
}
for unit in &mut units {
unit.language = Language::Html;
}
(units, false)
}
#[allow(clippy::too_many_arguments)]
fn extract_from_node(
node: Node,
path: &Path,
lines: &[&str],
bytes: &[u8],
lang: Language,
units: &mut Vec<CodeUnit>,
parent_class: Option<&str>,
file_imports: &[String],
depth: usize,
max_depth: usize,
depth_limit_hit: &mut bool,
) {
if *depth_limit_hit {
return;
}
if depth > max_depth {
*depth_limit_hit = true;
return;
}
let kind = node.kind();
if is_function_node(kind, lang) {
if let Some(unit) =
extract_function(node, path, lines, bytes, lang, parent_class, file_imports)
{
units.push(unit);
}
} else if is_class_node(kind, lang) {
if let Some(class_name) = get_node_name(node, bytes, lang) {
if let Some(unit) = extract_class(node, path, lines, bytes, lang, file_imports) {
units.push(unit);
}
if let Some(body) = find_class_body(node, lang) {
for child in body.children(&mut body.walk()) {
extract_from_node(
child,
path,
lines,
bytes,
lang,
units,
Some(&class_name),
file_imports,
depth + 1,
max_depth,
depth_limit_hit,
);
}
}
return;
}
} else if parent_class.is_none() && is_constant_node(kind, lang) {
if let Some(unit) = extract_constant(node, path, lines, bytes, lang, file_imports) {
units.push(unit);
}
return;
}
for child in node.children(&mut node.walk()) {
extract_from_node(
child,
path,
lines,
bytes,
lang,
units,
parent_class,
file_imports,
depth + 1,
max_depth,
depth_limit_hit,
);
}
}