use crate::utils::error::{Error, Result};
use regex::Regex;
use std::fs;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Language {
Rust,
Elixir,
Go,
}
impl Language {
fn as_str(&self) -> &'static str {
match self {
Language::Rust => "rust",
Language::Elixir => "elixir",
Language::Go => "go",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ServiceKind {
Struct,
Enum,
Trait,
}
impl ServiceKind {
fn rdf_class(&self) -> &'static str {
match self {
ServiceKind::Struct => "Service",
ServiceKind::Enum => "Enum",
ServiceKind::Trait => "Trait",
}
}
}
#[derive(Debug, Clone)]
pub struct Field {
pub name: String,
pub field_type: String,
}
#[derive(Debug, Clone)]
pub struct Method {
pub name: String,
pub params: Vec<String>,
pub return_type: Option<String>,
}
#[derive(Debug, Clone)]
pub struct Variant {
pub name: String,
pub payload: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ServiceDef {
pub name: String,
pub language: Language,
pub kind: ServiceKind,
pub fields: Vec<Field>,
pub methods: Vec<Method>,
pub variants: Vec<Variant>,
pub type_params: Vec<String>,
pub trait_bounds: std::collections::HashMap<String, Vec<String>>,
}
pub fn extract_rust_service(file_path: &str) -> Result<Vec<ServiceDef>> {
let content = fs::read_to_string(file_path)
.map_err(|e| Error::new(&format!("Failed to read {}: {}", file_path, e)))?;
extract_rust_service_from_str(&content)
}
pub fn extract_rust_service_from_str(content: &str) -> Result<Vec<ServiceDef>> {
let mut services: Vec<ServiceDef> = Vec::new();
let struct_header_re =
Regex::new(r"pub\s+struct\s+(\w+)\s*(?:<([^>]*)>)?\s*(?:where\s+([^{]*))?\{")
.map_err(|e| Error::new(&format!("Failed to compile struct header regex: {}", e)))?;
for cap in struct_header_re.captures_iter(content) {
let name = cap_str(&cap, 1);
let type_params = cap
.get(2)
.map(|m| parse_type_params(m.as_str()))
.unwrap_or_default();
let open_brace_idx = match cap.get(0) {
Some(m) => m.end() - 1,
None => continue,
};
let body = balanced_block_body(content, open_brace_idx).unwrap_or("");
let fields = parse_struct_fields(body)?;
let mut trait_bounds =
extract_bounds_from_type_params(cap.get(2).map(|m| m.as_str()).unwrap_or(""));
if let Some(where_clause) = cap.get(3) {
trait_bounds.extend(extract_bounds_from_type_params(where_clause.as_str()));
}
services.push(ServiceDef {
name,
language: Language::Rust,
kind: ServiceKind::Struct,
fields,
methods: Vec::new(),
variants: Vec::new(),
type_params,
trait_bounds,
});
}
let enum_header_re =
Regex::new(r"pub\s+enum\s+(\w+)\s*(?:<([^>]*)>)?\s*(?:where\s+([^{]*))?\{")
.map_err(|e| Error::new(&format!("Failed to compile enum header regex: {}", e)))?;
for cap in enum_header_re.captures_iter(content) {
let name = cap_str(&cap, 1);
let type_params = cap
.get(2)
.map(|m| parse_type_params(m.as_str()))
.unwrap_or_default();
let open_brace_idx = match cap.get(0) {
Some(m) => m.end() - 1,
None => continue,
};
let body = balanced_block_body(content, open_brace_idx).unwrap_or("");
let variants = parse_enum_variants(body)?;
let mut trait_bounds =
extract_bounds_from_type_params(cap.get(2).map(|m| m.as_str()).unwrap_or(""));
if let Some(where_clause) = cap.get(3) {
trait_bounds.extend(extract_bounds_from_type_params(where_clause.as_str()));
}
services.push(ServiceDef {
name,
language: Language::Rust,
kind: ServiceKind::Enum,
fields: Vec::new(),
methods: Vec::new(),
variants,
type_params,
trait_bounds,
});
}
let trait_header_re =
Regex::new(r"pub\s+trait\s+(\w+)\s*(?:<([^>]*)>)?\s*(?::[^{]*)?\s*(?:where\s+([^{]*))?\{")
.map_err(|e| Error::new(&format!("Failed to compile trait header regex: {}", e)))?;
for cap in trait_header_re.captures_iter(content) {
let name = cap_str(&cap, 1);
let type_params = cap
.get(2)
.map(|m| parse_type_params(m.as_str()))
.unwrap_or_default();
let open_brace_idx = match cap.get(0) {
Some(m) => m.end() - 1,
None => continue,
};
let body = balanced_block_body(content, open_brace_idx).unwrap_or("");
let methods = parse_rust_fn_signatures(body)?;
let mut trait_bounds =
extract_bounds_from_type_params(cap.get(2).map(|m| m.as_str()).unwrap_or(""));
if let Some(where_clause) = cap.get(3) {
trait_bounds.extend(extract_bounds_from_type_params(where_clause.as_str()));
}
services.push(ServiceDef {
name,
language: Language::Rust,
kind: ServiceKind::Trait,
fields: Vec::new(),
methods,
variants: Vec::new(),
type_params,
trait_bounds,
});
}
let impl_header_re = Regex::new(
r"impl(?:<[^>]*>)?\s+(?:[\w:]+(?:<[^>]*>)?\s+for\s+)?(\w+)(?:<[^>]*>)?\s*(?:where\s+[^{]*)?\{",
)
.map_err(|e| Error::new(&format!("Failed to compile impl header regex: {}", e)))?;
for cap in impl_header_re.captures_iter(content) {
let type_name = cap_str(&cap, 1);
let open_brace_idx = match cap.get(0) {
Some(m) => m.end() - 1,
None => continue,
};
let body = balanced_block_body(content, open_brace_idx).unwrap_or("");
let methods = parse_rust_fn_signatures(body)?;
if methods.is_empty() {
continue;
}
if let Some(existing) = services.iter_mut().find(|s| s.name == type_name) {
existing.methods.extend(methods);
} else {
services.push(ServiceDef {
name: type_name,
language: Language::Rust,
kind: ServiceKind::Struct,
fields: Vec::new(),
methods,
variants: Vec::new(),
type_params: Vec::new(),
trait_bounds: std::collections::HashMap::new(),
});
}
}
Ok(services)
}
fn cap_str(cap: ®ex::Captures<'_>, idx: usize) -> String {
cap.get(idx)
.map(|m| m.as_str().to_string())
.unwrap_or_default()
}
fn balanced_block_body(content: &str, open_idx: usize) -> Option<&str> {
let bytes = content.as_bytes();
if open_idx >= bytes.len() || bytes[open_idx] != b'{' {
return None;
}
let mut depth = 0usize;
let body_start = open_idx + 1;
let mut i = open_idx;
while i < bytes.len() {
match bytes[i] {
b'{' => depth += 1,
b'}' => {
depth -= 1;
if depth == 0 {
return content.get(body_start..i);
}
}
_ => {}
}
i += 1;
}
None
}
fn parse_type_params(raw: &str) -> Vec<String> {
raw.split(',')
.map(|p| {
let head = p.split(':').next().unwrap_or(p).trim();
head.to_string()
})
.filter(|p| !p.is_empty())
.collect()
}
pub fn extract_bounds_from_type_params(
raw: &str,
) -> std::collections::HashMap<String, Vec<String>> {
use std::collections::HashMap;
let mut bounds_map: HashMap<String, Vec<String>> = HashMap::new();
for param_spec in raw.split(',') {
let param_spec = param_spec.trim();
if param_spec.is_empty() {
continue;
}
let parts: Vec<&str> = param_spec.split(':').collect();
if parts.len() < 2 {
continue;
}
let param_name = parts[0].trim().to_string();
let bounds_str = parts[1..].join(":");
let bounds: Vec<String> = bounds_str
.split('+')
.map(|b| b.trim().to_string())
.filter(|b| !b.is_empty())
.collect();
if !bounds.is_empty() {
bounds_map.insert(param_name, bounds);
}
}
bounds_map
}
fn parse_enum_variants(body: &str) -> Result<Vec<Variant>> {
let mut variants = Vec::new();
let ident_re = Regex::new(r"^\s*(?:#\[[^\]]*\]\s*)*(\w+)(.*)")
.map_err(|e| Error::new(&format!("Failed to compile variant ident regex: {}", e)))?;
for segment in split_top_level_commas(body) {
let segment = segment.trim();
if segment.is_empty() {
continue;
}
if let Some(cap) = ident_re.captures(segment) {
let name = cap_str(&cap, 1);
if !name.is_empty() {
let rest = cap.get(2).map(|m| m.as_str()).unwrap_or("").trim();
let payload = if rest.starts_with('(') || rest.starts_with('{') {
let payload_end = rest.find('=').unwrap_or(rest.len());
let raw = rest[..payload_end].trim();
if raw.is_empty() {
None
} else {
Some(raw.to_string())
}
} else {
None
};
variants.push(Variant { name, payload });
}
}
}
Ok(variants)
}
fn split_top_level_commas(input: &str) -> Vec<&str> {
let bytes = input.as_bytes();
let mut segments = Vec::new();
let mut depth: i32 = 0;
let mut start = 0usize;
for (i, &b) in bytes.iter().enumerate() {
match b {
b'(' | b'[' | b'{' | b'<' => depth += 1,
b')' | b']' | b'}' | b'>' => {
if depth > 0 {
depth -= 1;
}
}
b',' if depth == 0 => {
if let Some(seg) = input.get(start..i) {
segments.push(seg);
}
start = i + 1;
}
_ => {}
}
}
if let Some(seg) = input.get(start..) {
segments.push(seg);
}
segments
}
fn parse_rust_fn_signatures(body: &str) -> Result<Vec<Method>> {
let mut methods = Vec::new();
let fn_re = Regex::new(
r"fn\s+(\w+)\s*(?:<[^>]*>)?\s*\(([^)]*)\)\s*(?:->\s*([^{;]+?))?\s*(?:where|\{|;)",
)
.map_err(|e| Error::new(&format!("Failed to compile fn regex: {}", e)))?;
for cap in fn_re.captures_iter(body) {
let name = cap_str(&cap, 1);
let params_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
let params: Vec<String> = params_str
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect();
let return_type = cap
.get(3)
.map(|m| m.as_str().trim().to_string())
.filter(|s| !s.is_empty());
methods.push(Method {
name,
params,
return_type,
});
}
Ok(methods)
}
pub fn extract_elixir_genserver(file_path: &str) -> Result<Vec<ServiceDef>> {
let content = fs::read_to_string(file_path)
.map_err(|e| Error::new(&format!("Failed to read {}: {}", file_path, e)))?;
let mut services = Vec::new();
let module_re = Regex::new(r"defmodule\s+(\w+)\s+do")
.map_err(|e| Error::new(&format!("Failed to compile module regex: {}", e)))?;
let has_genserver = content.contains("use GenServer");
for cap in module_re.captures_iter(&content) {
let name = cap.get(1).unwrap().as_str().to_string();
let methods = if has_genserver {
extract_elixir_callbacks(&content)?
} else {
Vec::new()
};
services.push(ServiceDef {
name,
language: Language::Elixir,
kind: ServiceKind::Struct,
fields: Vec::new(),
methods,
variants: Vec::new(),
type_params: Vec::new(),
trait_bounds: std::collections::HashMap::new(),
});
}
Ok(services)
}
fn extract_elixir_callbacks(content: &str) -> Result<Vec<Method>> {
let mut methods = Vec::new();
let callback_re =
Regex::new(r"def\s+(init|handle_call|handle_cast|handle_info|terminate)\s*\(([^)]*)\)")
.map_err(|e| Error::new(&format!("Failed to compile callback regex: {}", e)))?;
for cap in callback_re.captures_iter(content) {
let name = cap.get(1).unwrap().as_str().to_string();
let params_str = cap.get(2).unwrap().as_str();
let params: Vec<String> = params_str
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect();
methods.push(Method {
name,
params,
return_type: Some("tuple".to_string()), });
}
Ok(methods)
}
pub fn extract_go_service(file_path: &str) -> Result<Vec<ServiceDef>> {
let content = fs::read_to_string(file_path)
.map_err(|e| Error::new(&format!("Failed to read {}: {}", file_path, e)))?;
let mut services = Vec::new();
let struct_re = Regex::new(r"type\s+(\w+)\s+struct\s*\{([^}]*)\}")
.map_err(|e| Error::new(&format!("Failed to compile Go struct regex: {}", e)))?;
for cap in struct_re.captures_iter(&content) {
let name = cap.get(1).unwrap().as_str().to_string();
let body = cap.get(2).unwrap().as_str();
let fields = parse_go_struct_fields(body)?;
let methods = extract_go_methods(&content, &name)?;
services.push(ServiceDef {
name,
language: Language::Go,
kind: ServiceKind::Struct,
fields,
methods,
variants: Vec::new(),
type_params: Vec::new(),
trait_bounds: std::collections::HashMap::new(),
});
}
Ok(services)
}
fn parse_go_struct_fields(body: &str) -> Result<Vec<Field>> {
let mut fields = Vec::new();
let field_re = Regex::new(r"(\w+)\s+(\w+(?:\s*\[\])?(?:\s*\*)?)")
.map_err(|e| Error::new(&format!("Failed to compile field regex: {}", e)))?;
for cap in field_re.captures_iter(body) {
let field_name = cap.get(1).unwrap().as_str().to_string();
let field_type = cap.get(2).unwrap().as_str().to_string();
fields.push(Field {
name: field_name,
field_type,
});
}
Ok(fields)
}
fn extract_go_methods(content: &str, struct_name: &str) -> Result<Vec<Method>> {
let mut methods = Vec::new();
let method_pattern = format!(
r"func\s*\(\s*\w+\s*\*{}\s*\)\s*(\w+)\s*\(([^)]*)\)\s*(\w+)?",
struct_name
);
let method_re = Regex::new(&method_pattern)
.map_err(|e| Error::new(&format!("Failed to compile Go method regex: {}", e)))?;
for cap in method_re.captures_iter(content) {
let method_name = cap.get(1).unwrap().as_str().to_string();
let params_str = cap.get(2).unwrap().as_str();
let params: Vec<String> = params_str
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect();
let return_type = cap.get(3).map(|m| m.as_str().to_string());
methods.push(Method {
name: method_name,
params,
return_type,
});
}
Ok(methods)
}
fn parse_struct_fields(body: &str) -> Result<Vec<Field>> {
let mut fields = Vec::new();
let field_re = Regex::new(r"(\w+)\s*:\s*([^,}]+)")
.map_err(|e| Error::new(&format!("Failed to compile field regex: {}", e)))?;
for cap in field_re.captures_iter(body) {
let field_name = cap.get(1).unwrap().as_str().to_string();
let field_type = cap.get(2).unwrap().as_str().trim().to_string();
fields.push(Field {
name: field_name,
field_type,
});
}
Ok(fields)
}
pub fn convert_to_rdf(services: &[ServiceDef]) -> Result<String> {
let mut turtle = String::from(
"@prefix code: <https://ggen.io/code#> .\n\
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n\
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\n",
);
for service in services {
let safe_name = sanitize_iri(&service.name);
let resource_id = format!("code:{}", safe_name);
turtle.push_str(&format!(
"{} a code:{} ;\n code:language \"{}\" ;\n",
resource_id,
service.kind.rdf_class(),
service.language.as_str()
));
if !service.type_params.is_empty() {
for tp in &service.type_params {
turtle.push_str(&format!(" code:typeParam \"{}\" ;\n", tp));
if let Some(bounds) = service.trait_bounds.get(tp) {
for bound in bounds {
turtle.push_str(&format!(" code:traitBound \"{}\" ;\n", bound));
}
}
}
}
if !service.fields.is_empty() {
for field in &service.fields {
turtle.push_str(&format!(
" code:hasField [ a code:Field ; code:fieldName \"{}\" ; code:fieldType \"{}\" ] ;\n",
field.name, field.field_type
));
}
}
if !service.variants.is_empty() {
for variant in &service.variants {
turtle.push_str(&format!(
" code:hasVariant [ a code:Variant ; code:variantName \"{}\"",
variant.name
));
if let Some(payload) = &variant.payload {
turtle.push_str(&format!("; code:variantPayload \"{}\"", payload));
}
turtle.push_str(" ] ;\n");
}
}
if !service.methods.is_empty() {
for method in &service.methods {
turtle.push_str(&format!(
" code:hasMethod [ a code:Method ; code:methodName \"{}\"",
method.name
));
if !method.params.is_empty() {
let params_str = method.params.join(", ");
turtle.push_str(&format!("; code:methodParams \"{}\"", params_str));
}
if let Some(return_type) = &method.return_type {
turtle.push_str(&format!("; code:returnType \"{}\"", return_type));
}
turtle.push_str(" ] ;\n");
}
}
if turtle.ends_with(";\n") {
turtle.pop();
turtle.pop();
turtle.push_str(" .\n");
}
turtle.push('\n');
}
Ok(turtle)
}
fn sanitize_iri(input: &str) -> String {
input
.chars()
.map(|c| match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => c,
_ => '_',
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize_iri() {
assert_eq!(sanitize_iri("MyService"), "MyService");
assert_eq!(sanitize_iri("My-Service"), "My_Service");
assert_eq!(sanitize_iri("service.type"), "service_type");
}
#[test]
fn test_convert_to_rdf_empty() {
let services: Vec<ServiceDef> = vec![];
let rdf = convert_to_rdf(&services).unwrap();
assert!(rdf.contains("@prefix code:"));
}
#[test]
fn test_convert_to_rdf_with_service() {
let service = ServiceDef {
name: "MyService".to_string(),
language: Language::Rust,
kind: ServiceKind::Struct,
fields: vec![Field {
name: "id".to_string(),
field_type: "u64".to_string(),
}],
methods: vec![],
variants: vec![],
type_params: vec![],
trait_bounds: std::collections::HashMap::new(),
};
let rdf = convert_to_rdf(&[service]).unwrap();
assert!(rdf.contains("code:MyService"));
assert!(rdf.contains("code:Service"));
assert!(rdf.contains("\"rust\""));
assert!(rdf.contains("\"id\""));
assert!(rdf.contains("\"u64\""));
}
#[test]
fn test_convert_to_rdf_with_method() {
let service = ServiceDef {
name: "Handler".to_string(),
language: Language::Elixir,
kind: ServiceKind::Struct,
fields: vec![],
methods: vec![Method {
name: "handle_call".to_string(),
params: vec!["msg".to_string(), "from".to_string()],
return_type: Some("tuple".to_string()),
}],
variants: vec![],
type_params: vec![],
trait_bounds: std::collections::HashMap::new(),
};
let rdf = convert_to_rdf(&[service]).unwrap();
assert!(rdf.contains("code:Handler"));
assert!(rdf.contains("code:Method"));
assert!(rdf.contains("\"handle_call\""));
assert!(rdf.contains("code:methodParams"));
assert!(rdf.contains("code:returnType"));
}
#[test]
fn test_parse_struct_fields() {
let body = "id: u64, name: String, active: bool";
let fields = parse_struct_fields(body).unwrap();
assert_eq!(fields.len(), 3);
assert_eq!(fields[0].name, "id");
assert_eq!(fields[0].field_type, "u64");
assert_eq!(fields[1].name, "name");
assert_eq!(fields[1].field_type, "String");
}
#[test]
fn test_extract_elixir_callbacks() {
let content = r#"
defmodule MyHandler do
use GenServer
def init(args) do
{:ok, args}
end
def handle_call(msg, from, state) do
{:reply, msg, state}
end
end
"#;
let methods = extract_elixir_callbacks(content).unwrap();
assert!(methods.iter().any(|m| m.name == "init"));
assert!(methods.iter().any(|m| m.name == "handle_call"));
}
#[test]
fn test_parse_type_params_strips_bounds() {
let params = parse_type_params("T: Clone + Send, U, V: Default");
assert_eq!(
params,
vec!["T".to_string(), "U".to_string(), "V".to_string()]
);
}
#[test]
fn test_parse_enum_variants() {
let body = "Active,\n Inactive,\n Pending(u32),\n Custom { code: u16 },\n";
let variants = parse_enum_variants(body).unwrap();
let names: Vec<&str> = variants.iter().map(|v| v.name.as_str()).collect();
assert!(names.contains(&"Active"));
assert!(names.contains(&"Inactive"));
assert!(names.contains(&"Pending"));
assert!(names.contains(&"Custom"));
let pending = variants
.iter()
.find(|v| v.name == "Pending")
.expect("Pending variant");
assert!(pending.payload.is_some());
assert!(pending.payload.as_ref().unwrap().contains("u32"));
let custom = variants
.iter()
.find(|v| v.name == "Custom")
.expect("Custom variant");
assert!(custom.payload.is_some());
assert!(custom.payload.as_ref().unwrap().contains("code"));
}
#[test]
fn test_parse_rust_fn_signatures() {
let body = "fn start(&self) -> bool { true }\n fn stop(&mut self, force: bool) { }\n";
let methods = parse_rust_fn_signatures(body).unwrap();
let start = methods
.iter()
.find(|m| m.name == "start")
.expect("start fn");
assert_eq!(start.return_type.as_deref(), Some("bool"));
let stop = methods.iter().find(|m| m.name == "stop").expect("stop fn");
assert!(stop.params.iter().any(|p| p.contains("force")));
}
#[test]
fn test_extract_rust_struct_with_generics() {
let src = "pub struct Container<T> { item: T, count: usize }";
let services = extract_rust_service_from_str(src).unwrap();
let c = services
.iter()
.find(|s| s.name == "Container")
.expect("Container");
assert_eq!(c.kind, ServiceKind::Struct);
assert!(c.type_params.contains(&"T".to_string()));
assert!(c.fields.iter().any(|f| f.name == "item"));
}
#[test]
fn test_extract_rust_enum() {
let src = "pub enum Status { Ok, Err, Pending }";
let services = extract_rust_service_from_str(src).unwrap();
let e = services
.iter()
.find(|s| s.name == "Status")
.expect("Status enum");
assert_eq!(e.kind, ServiceKind::Enum);
let names: Vec<&str> = e.variants.iter().map(|v| v.name.as_str()).collect();
assert!(names.contains(&"Ok"));
assert!(names.contains(&"Err"));
assert!(names.contains(&"Pending"));
}
#[test]
fn test_extract_rust_trait() {
let src = "pub trait Runnable { fn run(&self) -> i32; fn name(&self) -> String; }";
let services = extract_rust_service_from_str(src).unwrap();
let t = services
.iter()
.find(|s| s.name == "Runnable")
.expect("Runnable trait");
assert_eq!(t.kind, ServiceKind::Trait);
assert!(t.methods.iter().any(|m| m.name == "run"));
assert!(t.methods.iter().any(|m| m.name == "name"));
}
#[test]
fn test_impl_methods_attached_to_struct() {
let src = "pub struct Worker { id: u32 }\n\
impl Worker { fn process(&self) -> bool { true } fn reset(&mut self) { } }";
let services = extract_rust_service_from_str(src).unwrap();
let w = services
.iter()
.find(|s| s.name == "Worker")
.expect("Worker");
assert_eq!(w.kind, ServiceKind::Struct);
assert!(w.methods.iter().any(|m| m.name == "process"));
assert!(w.methods.iter().any(|m| m.name == "reset"));
}
}