use tree_sitter::{Node, Parser};
use crate::error::{CodegraphError, Result};
use crate::graph::types::{
ByteSpan, FileFacts, RefRole, Reference, Scope, ScopeKind, Symbol, SymbolKind, Visibility,
};
use crate::lang::Language;
use crate::symbol::Descriptor;
use super::{
ExtractCtx, Extractor, attach_reference_scopes, definition_bindings, make_symbol, push_scope,
};
pub struct HclExtractor;
impl Extractor for HclExtractor {
fn lang(&self) -> Language {
Language::Hcl
}
fn extract(&self, source: &str, file: &str) -> Result<FileFacts> {
let ts_language = crate::grammar::hcl();
let mut parser = Parser::new();
parser
.set_language(&ts_language)
.map_err(|_| CodegraphError::Parse {
path: file.to_owned(),
})?;
let tree = parser
.parse(source, None)
.ok_or_else(|| CodegraphError::Parse {
path: file.to_owned(),
})?;
let root = tree.root_node();
let bytes = source.as_bytes();
let ctx = ExtractCtx {
bytes,
file,
lang: Language::Hcl,
};
let defs = collect_symbols(&root, &ctx);
let def_bindings = definition_bindings(&defs);
let mut symbols = defs;
symbols.push(super::module_symbol(Language::Hcl, &[], file, source.len()));
let mut references = collect_references(&root, ctx.bytes, ctx.file);
let scopes = collect_scopes(source.len());
attach_reference_scopes(&mut references, &scopes);
let bindings = def_bindings;
Ok(FileFacts {
file: file.to_owned(),
lang: Language::Hcl.as_str().to_owned(),
symbols,
references,
scopes,
bindings,
ffi_exports: Vec::new(),
})
}
}
fn collect_scopes(source_len: usize) -> Vec<Scope> {
let mut scopes: Vec<Scope> = Vec::new();
push_scope(
&mut scopes,
None,
ByteSpan {
start: 0,
end: source_len,
},
ScopeKind::Module,
);
scopes
}
fn block_type_and_labels(block: &Node, bytes: &[u8]) -> Option<(String, Vec<String>)> {
let named: Vec<Node> = {
let mut cursor = block.walk();
block.named_children(&mut cursor).collect()
};
let first = named.first()?;
if first.kind() != "identifier" {
return None;
}
let block_type = super::node_text(first, bytes).to_owned();
let mut labels = Vec::new();
for child in named.iter().skip(1) {
match child.kind() {
"string_lit" => {
labels.push(super::unquote(super::node_text(child, bytes)).to_owned());
}
"identifier" => {
labels.push(super::node_text(child, bytes).to_owned());
}
"block_start" | "body" | "block_end" | "object_start" | "object_end" => break,
_ => {
}
}
}
Some((block_type, labels))
}
fn collect_symbols(root: &Node, ctx: &ExtractCtx) -> Vec<Symbol> {
let body = {
let mut cursor = root.walk();
root.named_children(&mut cursor)
.find(|c| c.kind() == "body")
};
let Some(body) = body else {
return Vec::new();
};
let top_level_blocks: Vec<Node> = {
let mut cursor = body.walk();
body.named_children(&mut cursor).collect()
};
let mut out = Vec::new();
for block in &top_level_blocks {
if block.kind() != "block" {
continue;
}
if let Some(sym) = extract_block_symbol(block, ctx) {
out.push(sym);
}
}
out
}
fn extract_block_symbol(block: &Node, ctx: &ExtractCtx) -> Option<Symbol> {
let (block_type, labels) = block_type_and_labels(block, ctx.bytes)?;
let sig = super::one_line_signature(super::node_text(block, ctx.bytes), &['{']);
match block_type.as_str() {
"resource" => {
if labels.len() < 2 {
return None; }
let res_type = &labels[0];
let res_name = &labels[1];
let descriptors = vec![
Descriptor::Namespace(res_type.clone()),
Descriptor::Type(res_name.clone()),
];
Some(make_symbol(
ctx,
block,
res_name.clone(),
SymbolKind::Resource,
Visibility::Public,
descriptors,
sig,
))
}
"data" => {
if labels.len() < 2 {
return None; }
let src_type = &labels[0];
let src_name = &labels[1];
let descriptors = vec![
Descriptor::Namespace("data".to_owned()),
Descriptor::Namespace(src_type.clone()),
Descriptor::Type(src_name.clone()),
];
Some(make_symbol(
ctx,
block,
src_name.clone(),
SymbolKind::Resource,
Visibility::Public,
descriptors,
sig,
))
}
"module" => {
if labels.is_empty() {
return None; }
let mod_name = &labels[0];
let descriptors = vec![
Descriptor::Namespace("module".to_owned()),
Descriptor::Type(mod_name.clone()),
];
Some(make_symbol(
ctx,
block,
mod_name.clone(),
SymbolKind::Module,
Visibility::Public,
descriptors,
sig,
))
}
_ => None,
}
}
fn collect_references(root: &Node, bytes: &[u8], file: &str) -> Vec<Reference> {
let mut out = Vec::new();
collect_references_recursive(root, bytes, file, &mut out);
out
}
fn collect_references_recursive(node: &Node, bytes: &[u8], file: &str, out: &mut Vec<Reference>) {
if node.kind() == "expression" {
let named: Vec<Node> = {
let mut cursor = node.walk();
node.named_children(&mut cursor).collect()
};
if let Some(first) = named.first() {
if first.kind() == "variable_expr" {
let first_get_attr = named.iter().find(|n| n.kind() == "get_attr");
if let Some(get_attr) = first_get_attr {
if let Some(seg0_node) = first
.named_children(&mut first.walk())
.find(|n| n.kind() == "identifier")
{
if let Some(seg1_node) = get_attr
.named_children(&mut get_attr.walk())
.find(|n| n.kind() == "identifier")
{
let seg0 = super::node_text(&seg0_node, bytes);
let seg1 = super::node_text(&seg1_node, bytes);
if !seg0.is_empty() && !seg1.is_empty() {
out.push(Reference {
name: seg1.to_owned(),
qualifier: Some(seg0.to_owned()),
role: RefRole::TypeRef,
occ: super::node_occurrence(first, file),
source_module: None,
from_path: None,
scope: None,
type_ref_ctx: None,
});
}
}
}
return;
}
}
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
collect_references_recursive(&child, bytes, file, out);
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::extract_path;
use crate::graph::types::SymbolKind;
fn scip(sym: &Symbol) -> String {
sym.id.to_scip_string()
}
fn find_by_name<'a>(symbols: &'a [Symbol], name: &str) -> Option<&'a Symbol> {
symbols.iter().find(|s| s.name == name)
}
#[test]
fn hcl_emits_module_symbol() {
let src = r#"resource "aws_instance" "web" {}"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
assert_eq!(facts.lang, "hcl");
let mod_sym = facts
.symbols
.iter()
.find(|s| s.kind == SymbolKind::Module && s.name == "main")
.expect("expected a Module symbol named 'main'");
assert!(
mod_sym.id.to_scip_string().contains("main"),
"module symbol SCIP string should contain 'main'; got: {}",
mod_sym.id.to_scip_string()
);
}
#[test]
fn dispatch_routes_tf_extension() {
let src = r#"resource "aws_instance" "web" {}"#;
let facts = extract_path("infra/main.tf", src).unwrap();
assert_eq!(facts.lang, "hcl");
}
#[test]
fn dispatch_routes_hcl_extension() {
let src = r#"variable "region" { default = "us-east-1" }"#;
let facts = extract_path("infra/vars.hcl", src).unwrap();
assert_eq!(facts.lang, "hcl");
}
#[test]
fn dispatch_routes_tfvars_extension() {
let src = r#"region = "us-east-1""#;
let facts = extract_path("infra/prod.tfvars", src).unwrap();
assert_eq!(facts.lang, "hcl");
}
#[test]
fn resource_block_emits_resource_symbol() {
let src = r#"resource "aws_instance" "web" {}"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let sym = find_by_name(&facts.symbols, "web").expect("expected 'web' Resource symbol");
assert_eq!(sym.kind, SymbolKind::Resource);
assert!(
scip(sym).ends_with("aws_instance/web#"),
"resource SCIP should end with 'aws_instance/web#'; got: {}",
scip(sym)
);
}
#[test]
fn data_block_emits_resource_symbol_with_data_namespace() {
let src = r#"data "aws_ami" "ubuntu" {}"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let sym =
find_by_name(&facts.symbols, "ubuntu").expect("expected 'ubuntu' Resource symbol");
assert_eq!(sym.kind, SymbolKind::Resource);
assert!(
scip(sym).ends_with("data/aws_ami/ubuntu#"),
"data SCIP should end with 'data/aws_ami/ubuntu#'; got: {}",
scip(sym)
);
}
#[test]
fn module_block_emits_module_symbol() {
let src = r#"module "vpc" { source = "./vpc" }"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let sym = facts
.symbols
.iter()
.find(|s| s.kind == SymbolKind::Module && s.name == "vpc")
.expect("expected a Module symbol named 'vpc'");
assert!(
scip(sym).ends_with("module/vpc#"),
"module SCIP should end with 'module/vpc#'; got: {}",
scip(sym)
);
}
#[test]
fn variable_block_alone_emits_no_block_symbol() {
let src = r#"variable "region" {}"#;
let facts = HclExtractor.extract(src, "infra/vars.tf").unwrap();
let block_syms: Vec<_> = facts
.symbols
.iter()
.filter(|s| s.name == "region")
.collect();
assert!(
block_syms.is_empty(),
"variable block should produce no symbol in v1; got: {:?}",
block_syms
);
assert!(
facts.symbols.iter().any(|s| s.kind == SymbolKind::Module),
"the file module symbol should still be present"
);
}
#[test]
fn multi_block_file_emits_all_three_symbols() {
let src = r#"
resource "aws_instance" "web" {}
data "aws_ami" "ubuntu" {}
module "vpc" { source = "./vpc" }
"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let web = find_by_name(&facts.symbols, "web").expect("expected 'web'");
assert_eq!(web.kind, SymbolKind::Resource);
assert!(
scip(web).ends_with("aws_instance/web#"),
"got: {}",
scip(web)
);
let ubuntu = find_by_name(&facts.symbols, "ubuntu").expect("expected 'ubuntu'");
assert_eq!(ubuntu.kind, SymbolKind::Resource);
assert!(
scip(ubuntu).ends_with("data/aws_ami/ubuntu#"),
"got: {}",
scip(ubuntu)
);
let vpc = facts
.symbols
.iter()
.find(|s| s.kind == SymbolKind::Module && s.name == "vpc")
.expect("expected 'vpc'");
assert!(scip(vpc).ends_with("module/vpc#"), "got: {}", scip(vpc));
}
#[test]
fn empty_hcl_does_not_panic_and_returns_module_symbol() {
let facts = HclExtractor.extract("", "infra/empty.tf").unwrap();
assert!(
facts.symbols.iter().any(|s| s.kind == SymbolKind::Module),
"empty HCL should still produce the module symbol"
);
assert!(
facts.references.is_empty(),
"empty HCL should emit no references"
);
}
#[test]
fn malformed_hcl_does_not_panic() {
let facts = HclExtractor
.extract("THIS IS NOT VALID HCL !!!", "infra/bad.tf")
.unwrap();
assert!(
facts.symbols.iter().any(|s| s.kind == SymbolKind::Module),
"malformed HCL should still return Ok with the module symbol"
);
}
#[test]
fn resource_attr_traversal_emits_typeref_ref() {
let src = r#"resource "aws_instance" "web" { subnet_id = aws_subnet.main.id }"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::TypeRef && r.name == "main")
.collect();
assert_eq!(
refs.len(),
1,
"expected exactly one TypeRef ref named 'main', got: {:?}",
facts.references
);
assert_eq!(
refs[0].qualifier,
Some("aws_subnet".to_owned()),
"qualifier should be 'aws_subnet', got: {:?}",
refs[0].qualifier
);
}
#[test]
fn module_traversal_in_resource_body_emits_typeref_ref() {
let src = r#"resource "aws_instance" "web" { x = module.vpc.id }"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::TypeRef && r.name == "vpc")
.collect();
assert_eq!(
refs.len(),
1,
"expected exactly one TypeRef ref named 'vpc', got: {:?}",
facts.references
);
assert_eq!(
refs[0].qualifier,
Some("module".to_owned()),
"qualifier should be 'module', got: {:?}",
refs[0].qualifier
);
}
#[test]
fn traversal_inside_interpolation_emits_typeref_ref() {
let src = r#"resource "aws_instance" "web" { subnet_id = "${aws_subnet.main.id}" }"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::TypeRef && r.name == "main")
.collect();
assert_eq!(
refs.len(),
1,
"expected one TypeRef ref 'main' from interpolated traversal; \
if this fails, the grammar wraps interpolations differently — \
document as v1 boundary. Got: {:?}",
facts.references
);
assert_eq!(refs[0].qualifier, Some("aws_subnet".to_owned()));
}
#[test]
fn single_segment_variable_expr_emits_no_ref() {
let src = r#"resource "aws_instance" "web" { count = each }"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let bare_refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::TypeRef && r.name == "each")
.collect();
assert!(
bare_refs.is_empty(),
"bare single-segment variable should produce no TypeRef ref; got: {:?}",
bare_refs
);
}
#[test]
fn tier_b_one_module_scope_spans_file() {
use crate::graph::types::ScopeKind;
let src = r#"module "vpc" { source = "./vpc" }"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
assert_eq!(facts.scopes.len(), 1, "expected exactly one scope");
let s = &facts.scopes[0];
assert_eq!(s.kind, ScopeKind::Module, "scope kind should be Module");
assert_eq!(s.parent, None, "root scope has no parent");
assert_eq!(s.span.start, 0, "scope should start at 0");
assert_eq!(s.span.end, src.len(), "scope should end at source length");
}
#[test]
fn tier_b_module_block_yields_definition_binding() {
use crate::graph::types::{BindingKind, BindingTarget};
let src = r#"module "vpc" { source = "./vpc" }"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let binding = facts
.bindings
.iter()
.find(|b| b.name == "vpc")
.expect("expected a binding named 'vpc'");
assert_eq!(
binding.kind,
BindingKind::Definition,
"binding kind should be Definition"
);
assert_eq!(binding.scope, 0, "module binding must live in scope 0");
assert!(
matches!(binding.target, BindingTarget::Def(_)),
"target should be BindingTarget::Def(_); got {:?}",
binding.target
);
}
#[test]
fn tier_b_resource_block_yields_definition_binding() {
use crate::graph::types::{BindingKind, BindingTarget};
let src = r#"resource "aws_instance" "web" {}"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let binding = facts
.bindings
.iter()
.find(|b| b.name == "web")
.expect("expected a binding named 'web'");
assert_eq!(
binding.kind,
BindingKind::Definition,
"binding kind should be Definition"
);
assert_eq!(binding.scope, 0, "resource binding must live in scope 0");
assert!(
matches!(binding.target, BindingTarget::Def(_)),
"target should be BindingTarget::Def(_); got {:?}",
binding.target
);
}
#[test]
fn tier_b_reference_scope_is_attached() {
let src = r#"resource "aws_instance" "web" { x = module.vpc.id }"#;
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let vpc_ref = facts
.references
.iter()
.find(|r| r.role == RefRole::TypeRef && r.name == "vpc")
.expect("expected a TypeRef ref named 'vpc'");
assert!(
vpc_ref.scope.is_some(),
"reference scope should be attached (Some(_)); got None"
);
}
#[test]
fn tier_b_e2e_qualified_module_ref_resolves_exact() {
use crate::graph::types::{Confidence, RefRole};
use crate::resolve::{Resolver, ScopeGraphResolver};
let src = "module \"vpc\" {\n source = \"./vpc\"\n}\n\nresource \"aws_instance\" \"web\" {\n vpc_id = module.vpc.id\n}\n";
let facts = HclExtractor.extract(src, "infra/main.tf").unwrap();
let graph = ScopeGraphResolver.resolve(&[facts]);
let typeref_edges: Vec<_> = graph
.edges
.iter()
.filter(|e| e.role == RefRole::TypeRef && e.to.to_scip_string().contains("module/vpc"))
.collect();
assert_eq!(
typeref_edges.len(),
1,
"expected exactly one TypeRef edge targeting module/vpc, got: {:?}",
typeref_edges
.iter()
.map(|e| format!(
"{} → {} ({:?})",
e.from.to_scip_string(),
e.to.to_scip_string(),
e.confidence
))
.collect::<Vec<_>>()
);
assert_eq!(
typeref_edges[0].confidence,
Confidence::Exact,
"qualified module reference should resolve with Exact confidence"
);
}
}