Skip to main content

cadi_atomizer_java/
lib.rs

1//! Java language atomizer for CADI
2
3use async_trait::async_trait;
4use cadi_core::{AtomicChunk, atomizer::ResolvedImport};
5use cadi_extensions::{AtomizerExtension, Extension, ExtensionContext, ExtensionMetadata, ExtensionType, Result};
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use uuid::Uuid;
10use chrono;
11use semver;
12use blake3;
13
14/// Java atomizer extension
15pub struct JavaAtomizer {
16    metadata: ExtensionMetadata,
17    config: JavaConfig,
18    import_regex: Regex,
19    class_regex: Regex,
20    method_regex: Regex,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24struct JavaConfig {
25    java_version: String,
26}
27
28impl JavaAtomizer {
29    /// Create a new Java atomizer
30    pub fn new() -> Self {
31        Self {
32            metadata: ExtensionMetadata {
33                id: cadi_extensions::ExtensionId(Uuid::new_v4()),
34                name: "cadi-atomizer-java".into(),
35                version: "1.0.0".into(),
36                description: "Atomizer for Java programming language".into(),
37                author: "CADI Team".into(),
38                homepage: Some("https://cadi.dev".into()),
39                repository: Some("https://github.com/cadi-project/cadi".into()),
40                license: "MIT OR Apache-2.0".into(),
41                extension_type: ExtensionType::Atomizer,
42            },
43            config: JavaConfig {
44                java_version: "11".into(),
45            },
46            import_regex: Regex::new(r"^import\s+([a-zA-Z_][a-zA-Z0-9_.]*);").unwrap(),
47            class_regex: Regex::new(r"(?s)class\s+(\w+).*?\{(.*)\}").unwrap(),
48            method_regex: Regex::new(r"(?m)^\s*(?:public|private|protected)?\s*(?:static)?\s*(?:final)?\s*[\w\[\]<>]+(?:\s+\w+)?\s*\([^)]*\)\s*(?:throws\s+\w+(?:\s*,\s*\w+)*)?\s*\{([^}]*)\}").unwrap(),
49        }
50    }
51}
52
53#[async_trait]
54impl Extension for JavaAtomizer {
55    fn metadata(&self) -> ExtensionMetadata {
56        self.metadata.clone()
57    }
58
59    async fn initialize(&mut self, context: &ExtensionContext) -> Result<()> {
60        // Load configuration
61        if let Some(config) = context.config.get("java_version") {
62            if let Some(version) = config.as_str() {
63                self.config.java_version = version.to_string();
64            }
65        }
66
67        Ok(())
68    }
69
70    async fn shutdown(&mut self) -> Result<()> {
71        Ok(())
72    }
73}
74
75#[async_trait]
76impl AtomizerExtension for JavaAtomizer {
77    fn language(&self) -> &str {
78        "java"
79    }
80
81    async fn extract_atoms(&self, source: &str) -> Result<Vec<AtomicChunk>> {
82        let mut atoms = Vec::new();
83
84        // Extract class-level atoms
85        for capture in self.class_regex.captures_iter(source) {
86            let class_name = capture.get(1).unwrap().as_str();
87            let class_body = capture.get(2).unwrap().as_str();
88
89            // Create class atom
90            let class_atom = AtomicChunk {
91                chunk_id: format!("java:class:{}", class_name),
92                aliases: vec![],
93                name: class_name.to_string(),
94                description: Some(format!("Java class {}", class_name)),
95                language: "java".into(),
96                granularity: cadi_core::atomic::ChunkGranularity::Type,
97                categories: vec![cadi_core::atomic::ChunkCategory::Logic],
98                tags: vec!["class".into()],
99                concepts: vec![],
100                provides: vec![],
101                requires: vec![],
102                platform: Default::default(),
103                composition: Default::default(),
104                metrics: Default::default(),
105                sources: vec![],
106                content_hash: blake3::hash(class_body.as_bytes()).to_hex().to_string(),
107                size: class_body.len(),
108                license: "MIT".into(),
109                created_at: Some(chrono::Utc::now().to_rfc3339()),
110                version: Some("1.0.0".into()),
111            };
112            atoms.push(class_atom);
113
114            // Extract method-level atoms from class
115            for method_capture in self.method_regex.captures_iter(class_body) {
116                let method_body = method_capture.get(1).unwrap().as_str();
117
118                let method_atom = AtomicChunk {
119                    chunk_id: format!("java:method:{}.{}", class_name, atoms.len()),
120                    aliases: vec![],
121                    name: format!("{}.method{}", class_name, atoms.len()),
122                    description: Some(format!("Method in class {}", class_name)),
123                    language: "java".into(),
124                    granularity: cadi_core::atomic::ChunkGranularity::Function,
125                    categories: vec![cadi_core::atomic::ChunkCategory::Logic],
126                    tags: vec!["method".into()],
127                    concepts: vec![],
128                    provides: vec![],
129                    requires: vec![],
130                    platform: Default::default(),
131                    composition: cadi_core::atomic::ChunkComposition {
132                        composed_of: vec![cadi_core::atomic::ChunkReference {
133                            chunk_id: format!("java:class:{}", class_name),
134                            alias: None,
135                            required: true,
136                            imports: vec![],
137                        }],
138                        composed_by: vec![],
139                        is_atomic: true,
140                        composition_strategy: None,
141                    },
142                    metrics: Default::default(),
143                    sources: vec![],
144                    content_hash: blake3::hash(method_body.as_bytes()).to_hex().to_string(),
145                    size: method_body.len(),
146                    license: "MIT".into(),
147                    created_at: Some(chrono::Utc::now().to_rfc3339()),
148                    version: Some("1.0.0".into()),
149                };
150                atoms.push(method_atom);
151            }
152        }
153
154        Ok(atoms)
155    }
156
157    async fn resolve_imports(&self, source: &str) -> Result<Vec<ResolvedImport>> {
158        let mut imports = Vec::new();
159
160        for line in source.lines() {
161            if let Some(capture) = self.import_regex.captures(line) {
162                let import_path = capture.get(1).unwrap().as_str();
163
164                let resolved = ResolvedImport {
165                    source_path: import_path.to_string(),
166                    symbols: vec![cadi_core::atomizer::ImportedSymbol {
167                        name: import_path.split('.').last().unwrap_or(import_path).to_string(),
168                        alias: None,
169                        chunk_id: format!("java:import:{}", import_path),
170                        chunk_hash: blake3::hash(import_path.as_bytes()).to_hex().to_string(),
171                        symbol_type: Some("class".into()),
172                    }],
173                    line: 0, // We don't track line numbers in this simple example
174                };
175
176                imports.push(resolved);
177            }
178        }
179
180        Ok(imports)
181    }
182}
183
184/// Export the extension constructor for dynamic loading
185#[no_mangle]
186pub extern "C" fn cadi_extension_create() -> *mut dyn Extension {
187    Box::into_raw(Box::new(JavaAtomizer::new()))
188}