1use async_trait::async_trait;
4use cadi_core::{AtomicChunk, atomizer::ResolvedImport};
5use cadi_extensions::{AtomizerExtension, Extension, ExtensionContext, ExtensionMetadata, ExtensionType, Result};
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use uuid::Uuid;
10use chrono;
11use semver;
12use blake3;
13
14pub struct JavaAtomizer {
16 metadata: ExtensionMetadata,
17 config: JavaConfig,
18 import_regex: Regex,
19 class_regex: Regex,
20 method_regex: Regex,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24struct JavaConfig {
25 java_version: String,
26}
27
28impl JavaAtomizer {
29 pub fn new() -> Self {
31 Self {
32 metadata: ExtensionMetadata {
33 id: cadi_extensions::ExtensionId(Uuid::new_v4()),
34 name: "cadi-atomizer-java".into(),
35 version: "1.0.0".into(),
36 description: "Atomizer for Java programming language".into(),
37 author: "CADI Team".into(),
38 homepage: Some("https://cadi.dev".into()),
39 repository: Some("https://github.com/cadi-project/cadi".into()),
40 license: "MIT OR Apache-2.0".into(),
41 extension_type: ExtensionType::Atomizer,
42 },
43 config: JavaConfig {
44 java_version: "11".into(),
45 },
46 import_regex: Regex::new(r"^import\s+([a-zA-Z_][a-zA-Z0-9_.]*);").unwrap(),
47 class_regex: Regex::new(r"(?s)class\s+(\w+).*?\{(.*)\}").unwrap(),
48 method_regex: Regex::new(r"(?m)^\s*(?:public|private|protected)?\s*(?:static)?\s*(?:final)?\s*[\w\[\]<>]+(?:\s+\w+)?\s*\([^)]*\)\s*(?:throws\s+\w+(?:\s*,\s*\w+)*)?\s*\{([^}]*)\}").unwrap(),
49 }
50 }
51}
52
53#[async_trait]
54impl Extension for JavaAtomizer {
55 fn metadata(&self) -> ExtensionMetadata {
56 self.metadata.clone()
57 }
58
59 async fn initialize(&mut self, context: &ExtensionContext) -> Result<()> {
60 if let Some(config) = context.config.get("java_version") {
62 if let Some(version) = config.as_str() {
63 self.config.java_version = version.to_string();
64 }
65 }
66
67 Ok(())
68 }
69
70 async fn shutdown(&mut self) -> Result<()> {
71 Ok(())
72 }
73}
74
75#[async_trait]
76impl AtomizerExtension for JavaAtomizer {
77 fn language(&self) -> &str {
78 "java"
79 }
80
81 async fn extract_atoms(&self, source: &str) -> Result<Vec<AtomicChunk>> {
82 let mut atoms = Vec::new();
83
84 for capture in self.class_regex.captures_iter(source) {
86 let class_name = capture.get(1).unwrap().as_str();
87 let class_body = capture.get(2).unwrap().as_str();
88
89 let class_atom = AtomicChunk {
91 chunk_id: format!("java:class:{}", class_name),
92 aliases: vec![],
93 name: class_name.to_string(),
94 description: Some(format!("Java class {}", class_name)),
95 language: "java".into(),
96 granularity: cadi_core::atomic::ChunkGranularity::Type,
97 categories: vec![cadi_core::atomic::ChunkCategory::Logic],
98 tags: vec!["class".into()],
99 concepts: vec![],
100 provides: vec![],
101 requires: vec![],
102 platform: Default::default(),
103 composition: Default::default(),
104 metrics: Default::default(),
105 sources: vec![],
106 content_hash: blake3::hash(class_body.as_bytes()).to_hex().to_string(),
107 size: class_body.len(),
108 license: "MIT".into(),
109 created_at: Some(chrono::Utc::now().to_rfc3339()),
110 version: Some("1.0.0".into()),
111 };
112 atoms.push(class_atom);
113
114 for method_capture in self.method_regex.captures_iter(class_body) {
116 let method_body = method_capture.get(1).unwrap().as_str();
117
118 let method_atom = AtomicChunk {
119 chunk_id: format!("java:method:{}.{}", class_name, atoms.len()),
120 aliases: vec![],
121 name: format!("{}.method{}", class_name, atoms.len()),
122 description: Some(format!("Method in class {}", class_name)),
123 language: "java".into(),
124 granularity: cadi_core::atomic::ChunkGranularity::Function,
125 categories: vec![cadi_core::atomic::ChunkCategory::Logic],
126 tags: vec!["method".into()],
127 concepts: vec![],
128 provides: vec![],
129 requires: vec![],
130 platform: Default::default(),
131 composition: cadi_core::atomic::ChunkComposition {
132 composed_of: vec![cadi_core::atomic::ChunkReference {
133 chunk_id: format!("java:class:{}", class_name),
134 alias: None,
135 required: true,
136 imports: vec![],
137 }],
138 composed_by: vec![],
139 is_atomic: true,
140 composition_strategy: None,
141 },
142 metrics: Default::default(),
143 sources: vec![],
144 content_hash: blake3::hash(method_body.as_bytes()).to_hex().to_string(),
145 size: method_body.len(),
146 license: "MIT".into(),
147 created_at: Some(chrono::Utc::now().to_rfc3339()),
148 version: Some("1.0.0".into()),
149 };
150 atoms.push(method_atom);
151 }
152 }
153
154 Ok(atoms)
155 }
156
157 async fn resolve_imports(&self, source: &str) -> Result<Vec<ResolvedImport>> {
158 let mut imports = Vec::new();
159
160 for line in source.lines() {
161 if let Some(capture) = self.import_regex.captures(line) {
162 let import_path = capture.get(1).unwrap().as_str();
163
164 let resolved = ResolvedImport {
165 source_path: import_path.to_string(),
166 symbols: vec![cadi_core::atomizer::ImportedSymbol {
167 name: import_path.split('.').last().unwrap_or(import_path).to_string(),
168 alias: None,
169 chunk_id: format!("java:import:{}", import_path),
170 chunk_hash: blake3::hash(import_path.as_bytes()).to_hex().to_string(),
171 symbol_type: Some("class".into()),
172 }],
173 line: 0, };
175
176 imports.push(resolved);
177 }
178 }
179
180 Ok(imports)
181 }
182}
183
184#[no_mangle]
186pub extern "C" fn cadi_extension_create() -> *mut dyn Extension {
187 Box::into_raw(Box::new(JavaAtomizer::new()))
188}