leankg 0.15.3

Lightweight Knowledge Graph for AI-Assisted Development
Documentation
use crate::db::models::{CodeElement, Relationship};
use regex::Regex;

pub struct MavenExtractor<'a> {
    source: &'a [u8],
    file_path: &'a str,
}

impl<'a> MavenExtractor<'a> {
    pub fn new(source: &'a [u8], file_path: &'a str) -> Self {
        Self { source, file_path }
    }

    pub fn extract(&self) -> (Vec<CodeElement>, Vec<Relationship>) {
        let content = std::str::from_utf8(self.source).unwrap_or("");
        let mut elements = Vec::new();
        let mut relationships = Vec::new();

        let group_id = Self::extract_tag(content, "groupId").unwrap_or_default();
        let artifact_id = Self::extract_tag(content, "artifactId").unwrap_or_default();
        let version = Self::extract_tag(content, "version").unwrap_or_default();
        let packaging =
            Self::extract_tag(content, "packaging").unwrap_or_else(|| "jar".to_string());

        elements.push(CodeElement {
            qualified_name: format!("__maven_project__{}:{}", group_id, artifact_id),
            element_type: "maven_project".to_string(),
            name: artifact_id.clone(),
            file_path: self.file_path.to_string(),
            language: "maven".to_string(),
            metadata: serde_json::json!({
                "groupId": group_id,
                "artifactId": artifact_id,
                "version": version,
                "packaging": packaging,
            }),
            ..Default::default()
        });

        let dep_re = Regex::new(r"<dependency>([\s\S]*?)</dependency>").unwrap();
        for cap in dep_re.captures_iter(content) {
            let dep_block = &cap[1];
            let dep_group = Self::extract_tag(dep_block, "groupId").unwrap_or_default();
            let dep_artifact = Self::extract_tag(dep_block, "artifactId").unwrap_or_default();
            let dep_version = Self::extract_tag(dep_block, "version");
            let dep_scope = Self::extract_tag(dep_block, "scope");

            if !dep_artifact.is_empty() {
                let dep_id = format!("__dep__{}:{}", dep_group, dep_artifact);
                elements.push(CodeElement {
                    qualified_name: dep_id.clone(),
                    element_type: "dependency".to_string(),
                    name: dep_artifact,
                    file_path: self.file_path.to_string(),
                    language: "maven".to_string(),
                    ..Default::default()
                });

                relationships.push(Relationship {
                    id: None,
                    source_qualified: self.file_path.to_string(),
                    target_qualified: dep_id,
                    rel_type: "has_dependency".to_string(),
                    confidence: 1.0,
                    metadata: serde_json::json!({
                        "scope": dep_scope.unwrap_or_else(|| "compile".to_string()),
                        "version": dep_version,
                    }),
                });
            }
        }

        (elements, relationships)
    }

    fn extract_tag(content: &str, tag: &str) -> Option<String> {
        let re = Regex::new(&format!(r"<{}>([^<]+)</{}>", tag, tag)).ok()?;
        re.captures(content)
            .and_then(|cap| cap.get(1))
            .map(|m| m.as_str().trim().to_string())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_extract_maven_dependencies() {
        let source = br#"<?xml version="1.0" encoding="UTF-8"?>
<project>
    <groupId>com.example</groupId>
    <artifactId>my-app</artifactId>
    <version>1.0.0</version>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>
</project>"#;
        let extractor = MavenExtractor::new(source.as_slice(), "pom.xml");
        let (_, relationships) = extractor.extract();
        let deps: Vec<_> = relationships
            .iter()
            .filter(|r| r.rel_type == "has_dependency")
            .collect();
        assert!(
            deps.len() >= 2,
            "Should extract at least 2 Maven dependencies, got {}",
            deps.len()
        );
    }

    #[test]
    fn test_extract_maven_project_coords() {
        let source = br#"<?xml version="1.0"?>
<project>
    <groupId>com.example</groupId>
    <artifactId>my-app</artifactId>
    <version>1.0.0</version>
    <packaging>jar</packaging>
</project>"#;
        let extractor = MavenExtractor::new(source.as_slice(), "pom.xml");
        let (elements, _) = extractor.extract();
        let project: Vec<_> = elements
            .iter()
            .filter(|e| e.element_type == "maven_project")
            .collect();
        assert!(!project.is_empty(), "Should extract Maven project");
        assert_eq!(project[0].metadata["groupId"], "com.example");
        assert_eq!(project[0].metadata["artifactId"], "my-app");
    }
}