leankg 0.16.7

Lightweight Knowledge Graph for AI-Assisted Development
Documentation
use crate::db::models::{CodeElement, Relationship};
use regex::Regex;

pub struct TerraformExtractor {
    source: Vec<u8>,
    file_path: String,
}

impl TerraformExtractor {
    pub fn new(source: &[u8], file_path: &str) -> Self {
        Self {
            source: source.to_vec(),
            file_path: file_path.to_string(),
        }
    }

    pub fn extract(&self) -> (Vec<CodeElement>, Vec<Relationship>) {
        let mut elements = Vec::new();
        let content = String::from_utf8_lossy(&self.source);
        let source_str = content.as_ref();

        let resource_regex = Regex::new(r#"(?m)^resource\s+"([^"]+)"\s+"([^"]+)""#).ok();
        let data_regex = Regex::new(r#"(?m)^data\s+"([^"]+)"\s+"([^"]+)""#).ok();
        let variable_regex = Regex::new(r#"(?m)^variable\s+"([^"]+)""#).ok();
        let output_regex = Regex::new(r#"(?m)^output\s+"([^"]+)""#).ok();
        let module_regex = Regex::new(r#"(?m)^module\s+"([^"]+)""#).ok();
        let provider_regex = Regex::new(r#"(?m)^provider\s+"([^"]+)""#).ok();

        elements.push(CodeElement {
            qualified_name: self.file_path.clone(),
            element_type: "terraform".to_string(),
            name: self
                .file_path
                .rsplit('/')
                .next()
                .unwrap_or(&self.file_path)
                .to_string(),
            file_path: self.file_path.clone(),
            line_start: 1,
            line_end: source_str.lines().count() as u32,
            language: "terraform".to_string(),
            parent_qualified: None,
            metadata: serde_json::json!({}),
            ..Default::default()
        });

        if let Some(re) = resource_regex {
            for cap in re.captures_iter(source_str) {
                if let (Some(resource_type), Some(name)) = (cap.get(1), cap.get(2)) {
                    let qualified_name = format!("{}::{}", self.file_path, name.as_str());
                    elements.push(CodeElement {
                        qualified_name: qualified_name.clone(),
                        element_type: "terraform_resource".to_string(),
                        name: name.as_str().to_string(),
                        file_path: self.file_path.clone(),
                        line_start: source_str[..cap.get(0).unwrap().start()].lines().count()
                            as u32
                            + 1,
                        line_end: source_str[..cap.get(0).unwrap().start()].lines().count() as u32
                            + 1,
                        language: "terraform".to_string(),
                        parent_qualified: Some(self.file_path.clone()),
                        metadata: serde_json::json!({
                            "resource_type": resource_type.as_str(),
                            "name": name.as_str(),
                        }),
                        ..Default::default()
                    });
                }
            }
        }

        if let Some(re) = data_regex {
            for cap in re.captures_iter(source_str) {
                if let (Some(data_type), Some(name)) = (cap.get(1), cap.get(2)) {
                    let qualified_name = format!("{}::{}", self.file_path, name.as_str());
                    elements.push(CodeElement {
                        qualified_name: qualified_name.clone(),
                        element_type: "terraform_data".to_string(),
                        name: name.as_str().to_string(),
                        file_path: self.file_path.clone(),
                        line_start: source_str[..cap.get(0).unwrap().start()].lines().count()
                            as u32
                            + 1,
                        line_end: source_str[..cap.get(0).unwrap().start()].lines().count() as u32
                            + 1,
                        language: "terraform".to_string(),
                        parent_qualified: Some(self.file_path.clone()),
                        metadata: serde_json::json!({
                            "data_type": data_type.as_str(),
                            "name": name.as_str(),
                        }),
                        ..Default::default()
                    });
                }
            }
        }

        if let Some(re) = variable_regex {
            for cap in re.captures_iter(source_str) {
                if let Some(name) = cap.get(1) {
                    let qualified_name = format!("{}::var.{}", self.file_path, name.as_str());
                    elements.push(CodeElement {
                        qualified_name: qualified_name.clone(),
                        element_type: "terraform_variable".to_string(),
                        name: name.as_str().to_string(),
                        file_path: self.file_path.clone(),
                        line_start: source_str[..cap.get(0).unwrap().start()].lines().count()
                            as u32
                            + 1,
                        line_end: source_str[..cap.get(0).unwrap().start()].lines().count() as u32
                            + 1,
                        language: "terraform".to_string(),
                        parent_qualified: Some(self.file_path.clone()),
                        metadata: serde_json::json!({
                            "name": name.as_str(),
                        }),
                        ..Default::default()
                    });
                }
            }
        }

        if let Some(re) = output_regex {
            for cap in re.captures_iter(source_str) {
                if let Some(name) = cap.get(1) {
                    let qualified_name = format!("{}::out.{}", self.file_path, name.as_str());
                    elements.push(CodeElement {
                        qualified_name: qualified_name.clone(),
                        element_type: "terraform_output".to_string(),
                        name: name.as_str().to_string(),
                        file_path: self.file_path.clone(),
                        line_start: source_str[..cap.get(0).unwrap().start()].lines().count()
                            as u32
                            + 1,
                        line_end: source_str[..cap.get(0).unwrap().start()].lines().count() as u32
                            + 1,
                        language: "terraform".to_string(),
                        parent_qualified: Some(self.file_path.clone()),
                        metadata: serde_json::json!({
                            "name": name.as_str(),
                        }),
                        ..Default::default()
                    });
                }
            }
        }

        if let Some(re) = module_regex {
            for cap in re.captures_iter(source_str) {
                if let Some(name) = cap.get(1) {
                    let qualified_name = format!("{}::module.{}", self.file_path, name.as_str());
                    elements.push(CodeElement {
                        qualified_name: qualified_name.clone(),
                        element_type: "terraform_module".to_string(),
                        name: name.as_str().to_string(),
                        file_path: self.file_path.clone(),
                        line_start: source_str[..cap.get(0).unwrap().start()].lines().count()
                            as u32
                            + 1,
                        line_end: source_str[..cap.get(0).unwrap().start()].lines().count() as u32
                            + 1,
                        language: "terraform".to_string(),
                        parent_qualified: Some(self.file_path.clone()),
                        metadata: serde_json::json!({
                            "name": name.as_str(),
                        }),
                        ..Default::default()
                    });
                }
            }
        }

        if let Some(re) = provider_regex {
            for cap in re.captures_iter(source_str) {
                if let Some(name) = cap.get(1) {
                    let qualified_name = format!("{}::provider.{}", self.file_path, name.as_str());
                    elements.push(CodeElement {
                        qualified_name: qualified_name.clone(),
                        element_type: "terraform_provider".to_string(),
                        name: name.as_str().to_string(),
                        file_path: self.file_path.clone(),
                        line_start: source_str[..cap.get(0).unwrap().start()].lines().count()
                            as u32
                            + 1,
                        line_end: source_str[..cap.get(0).unwrap().start()].lines().count() as u32
                            + 1,
                        language: "terraform".to_string(),
                        parent_qualified: Some(self.file_path.clone()),
                        metadata: serde_json::json!({
                            "name": name.as_str(),
                        }),
                        ..Default::default()
                    });
                }
            }
        }

        (elements, Vec::new())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_extract_terraform_resource() {
        let source = br#"
resource "aws_s3_bucket" "example" {
  bucket = "my-example-bucket"
}

resource "aws_ec2_instance" "web" {
  ami           = "ami-12345678"
  instance_type = "t2.micro"
}
"#;
        let extractor = TerraformExtractor::new(source, "main.tf");
        let (elements, _) = extractor.extract();

        let resources: Vec<_> = elements
            .iter()
            .filter(|e| e.element_type == "terraform_resource")
            .collect();
        assert_eq!(resources.len(), 2);
    }

    #[test]
    fn test_extract_terraform_variable() {
        let source = br#"
variable "region" {
  type    = string
  default = "us-west-2"
}

variable "instance_type" {
  type    = string
  default = "t2.micro"
}
"#;
        let extractor = TerraformExtractor::new(source, "vars.tf");
        let (elements, _) = extractor.extract();

        let variables: Vec<_> = elements
            .iter()
            .filter(|e| e.element_type == "terraform_variable")
            .collect();
        assert_eq!(variables.len(), 2);
    }

    #[test]
    fn test_extract_terraform_output() {
        let source = br#"
output "bucket_name" {
  value = aws_s3_bucket.example.bucket
}
"#;
        let extractor = TerraformExtractor::new(source, "outputs.tf");
        let (elements, _) = extractor.extract();

        let outputs: Vec<_> = elements
            .iter()
            .filter(|e| e.element_type == "terraform_output")
            .collect();
        assert_eq!(outputs.len(), 1);
    }

    #[test]
    fn test_extract_terraform_data() {
        let source = br#"
data "aws_ami" "ubuntu" {
  most_recent = true
  owners      = ["099720109477"]
}
"#;
        let extractor = TerraformExtractor::new(source, "data.tf");
        let (elements, _) = extractor.extract();

        let data_sources: Vec<_> = elements
            .iter()
            .filter(|e| e.element_type == "terraform_data")
            .collect();
        assert_eq!(data_sources.len(), 1);
    }
}