use ahash::{AHashMap, AHashSet};
use alef_core::config::AlefConfig;
use alef_core::ir::{ApiSurface, TypeDef, TypeRef};
use anyhow::Context as _;
use std::collections::HashMap;
use std::path::Path;
use tracing::{debug, info};
use crate::cache;
use super::version::read_version;
pub fn ensure_gitignore(base_dir: &Path, config: &AlefConfig) {
use alef_core::config::Language;
let gitignore_path = base_dir.join(".gitignore");
let existing = std::fs::read_to_string(&gitignore_path).unwrap_or_default();
let existing_lines: AHashSet<&str> = existing.lines().map(str::trim).collect();
let mut entries: Vec<&str> = vec![".alef/"];
for lang in &config.languages {
match lang {
Language::Python => {
entries.extend_from_slice(&["__pycache__/", "*.so", "*.pyd", ".venv/", "*.egg-info/", "dist/"])
}
Language::Node => entries.extend_from_slice(&["node_modules/", "*.node"]),
Language::Ruby => entries.extend_from_slice(&[".gems/", "vendor/bundle/"]),
Language::Php => entries.extend_from_slice(&["vendor/"]),
Language::Ffi => entries.push("*.h.bak"),
Language::Go => entries.push("*.test"),
Language::Java => entries.extend_from_slice(&["target/", "*.class"]),
Language::Csharp => entries.extend_from_slice(&["bin/", "obj/", "*.nupkg"]),
Language::Wasm => {}
_ => {}
}
}
let mut to_add = Vec::new();
for entry in &entries {
if !existing_lines.contains(entry) {
to_add.push(*entry);
}
}
if to_add.is_empty() {
return;
}
let separator = if existing.is_empty() || existing.ends_with('\n') {
""
} else {
"\n"
};
let additions = to_add.join("\n");
let new_content = format!("{existing}{separator}{additions}\n");
if let Err(e) = std::fs::write(&gitignore_path, new_content) {
debug!("Could not update .gitignore: {e}");
} else {
debug!("Updated .gitignore with {} entries", to_add.len());
}
}
pub fn extract(config: &AlefConfig, config_path: &Path, clean: bool) -> anyhow::Result<ApiSurface> {
if let Some(parent) = config_path.parent() {
ensure_gitignore(parent, config);
}
let source_hash = cache::compute_source_hash(&config.crate_config.sources, config_path)
.context("failed to compute source hash")?;
if !clean && cache::is_ir_cached(&source_hash) {
info!("Using cached IR");
return cache::read_cached_ir().context("failed to read cached IR");
}
let mut api = extract_raw(config, config_path)?;
api = apply_filters(api, config);
inject_declared_opaque_types(&mut api, config);
strip_cfg_fields(&mut api, &config.crate_config.features);
sanitize_unknown_types(&mut api);
apply_path_mappings(&mut api, config);
dedup_api_surface(&mut api);
cache::write_ir_cache(&api, &source_hash).context("failed to write IR cache")?;
info!(
"Extracted {} types, {} functions, {} enums",
api.types.len(),
api.functions.len(),
api.enums.len()
);
Ok(api)
}
fn extract_raw(config: &AlefConfig, _config_path: &Path) -> anyhow::Result<ApiSurface> {
info!("Extracting API surface from Rust source...");
let version = read_version(&config.crate_config.version_from)?;
let workspace_root = config.crate_config.workspace_root.as_deref();
let default_name = &config.crate_config.name;
let mut groups: std::collections::BTreeMap<String, Vec<&Path>> = std::collections::BTreeMap::new();
if !config.crate_config.source_crates.is_empty() {
for sc in &config.crate_config.source_crates {
let crate_name = sc.name.replace('-', "_");
for source in &sc.sources {
groups.entry(crate_name.clone()).or_default().push(source.as_path());
}
}
} else {
for source in &config.crate_config.sources {
let crate_name = derive_crate_name_from_path(source, default_name);
groups.entry(crate_name).or_default().push(source.as_path());
}
}
let mut merged = ApiSurface {
crate_name: default_name.to_string(),
version: version.clone(),
types: vec![],
functions: vec![],
enums: vec![],
errors: vec![],
};
for (crate_name, sources) in &groups {
let api = alef_extract::extractor::extract(sources, crate_name, &version, workspace_root)
.with_context(|| format!("failed to extract API surface from crate {crate_name}"))?;
merged.types.extend(api.types);
merged.functions.extend(api.functions);
merged.enums.extend(api.enums);
merged.errors.extend(api.errors);
}
Ok(merged)
}
fn derive_crate_name_from_path(path: &Path, default: &str) -> String {
let path_str = path.to_string_lossy();
if let Some(after_crates) = path_str.split("crates/").nth(1) {
if let Some(name) = after_crates.split('/').next() {
if path_str.contains(&format!("crates/{name}/src/")) {
return name.replace('-', "_");
}
}
}
default.to_string()
}
fn inject_declared_opaque_types(api: &mut ApiSurface, config: &AlefConfig) {
let mut sorted_opaques: Vec<_> = config.opaque_types.iter().collect();
sorted_opaques.sort_by_key(|(name, _)| (*name).clone());
for (name, rust_path) in sorted_opaques {
if !api.types.iter().any(|t| t.name == *name) && !api.enums.iter().any(|e| e.name == *name) {
api.types.push(alef_core::ir::TypeDef {
name: name.clone(),
rust_path: rust_path.clone(),
original_rust_path: rust_path.clone(),
fields: vec![],
methods: vec![],
is_opaque: true,
is_clone: false,
is_trait: false,
has_default: false,
has_stripped_cfg_fields: false,
is_return_type: false,
doc: String::new(),
cfg: None,
serde_rename_all: None,
has_serde: false,
super_traits: vec![],
});
debug!("Injected declared opaque type: {name} -> {rust_path}");
}
}
}
fn sanitize_unknown_types(api: &mut ApiSurface) {
let known_types: AHashSet<String> = api.types.iter().map(|t| t.name.clone()).collect();
let known_enums: AHashSet<String> = api.enums.iter().map(|e| e.name.clone()).collect();
let known_type_paths: AHashSet<String> = api.types.iter().map(|t| t.rust_path.replace('-', "_")).collect();
let known_enum_paths: AHashSet<String> = api.enums.iter().map(|e| e.rust_path.replace('-', "_")).collect();
for typ in &mut api.types {
for field in &mut typ.fields {
if sanitize_type_ref(&mut field.ty, &known_types, &known_enums) {
field.sanitized = true;
}
if !field.sanitized {
if let Some(ref path) = field.type_rust_path {
let normalized_path = path.replace('-', "_");
if let TypeRef::Named(ref name) = field.ty {
if known_types.contains(name.as_str()) || known_enums.contains(name.as_str()) {
let path_type_name = normalized_path.rsplit("::").next().unwrap_or("");
let path_matches = known_type_paths
.iter()
.chain(known_enum_paths.iter())
.any(|kp| kp.rsplit("::").next().unwrap_or("") == path_type_name);
if !path_matches {
field.ty = TypeRef::String;
field.sanitized = true;
}
}
}
if let TypeRef::Vec(ref inner) = field.ty {
if let TypeRef::Named(ref name) = **inner {
let vec_path_type = normalized_path.rsplit("::").next().unwrap_or("");
let vec_matches = known_type_paths
.iter()
.chain(known_enum_paths.iter())
.any(|kp| kp.rsplit("::").next().unwrap_or("") == vec_path_type);
if (known_types.contains(name.as_str()) || known_enums.contains(name.as_str()))
&& !vec_matches
{
field.ty = TypeRef::String;
field.sanitized = true;
}
}
}
}
}
}
let type_name = typ.name.clone();
for method in &mut typ.methods {
let mut method_sanitized = false;
for param in &mut method.params {
if sanitize_type_ref(&mut param.ty, &known_types, &known_enums) {
param.sanitized = true;
method_sanitized = true;
}
}
let is_self_return = matches!(&method.return_type, TypeRef::Named(n) if n == &type_name);
if !is_self_return && sanitize_type_ref(&mut method.return_type, &known_types, &known_enums) {
method_sanitized = true;
}
if method_sanitized {
method.sanitized = true;
}
}
}
for func in &mut api.functions {
let mut func_sanitized = false;
for param in &mut func.params {
if sanitize_type_ref(&mut param.ty, &known_types, &known_enums) {
param.sanitized = true;
func_sanitized = true;
}
}
if sanitize_type_ref(&mut func.return_type, &known_types, &known_enums) {
func_sanitized = true;
}
if func_sanitized {
func.sanitized = true;
}
}
for enum_def in &mut api.enums {
for variant in &mut enum_def.variants {
for field in &mut variant.fields {
if sanitize_type_ref(&mut field.ty, &known_types, &known_enums) {
field.sanitized = true;
}
}
}
}
for error_def in &mut api.errors {
for variant in &mut error_def.variants {
for field in &mut variant.fields {
if sanitize_type_ref(&mut field.ty, &known_types, &known_enums) {
field.sanitized = true;
}
}
}
}
}
fn sanitize_type_ref(ty: &mut TypeRef, known_types: &AHashSet<String>, known_enums: &AHashSet<String>) -> bool {
match ty {
TypeRef::Named(name) if !known_types.contains(name.as_str()) && !known_enums.contains(name.as_str()) => {
if let Some(elem_ty) = parse_homogeneous_tuple(name) {
*ty = TypeRef::Vec(Box::new(elem_ty));
return true; }
*ty = TypeRef::String;
true
}
TypeRef::Optional(inner) | TypeRef::Vec(inner) => sanitize_type_ref(inner, known_types, known_enums),
TypeRef::Map(k, v) => {
let a = sanitize_type_ref(k, known_types, known_enums);
let b = sanitize_type_ref(v, known_types, known_enums);
a || b
}
_ => false,
}
}
fn parse_homogeneous_tuple(name: &str) -> Option<TypeRef> {
use alef_core::ir::PrimitiveType;
let name = name.trim();
let inner = name.strip_prefix('(')?.strip_suffix(')')?;
let parts: Vec<&str> = inner.split(',').map(str::trim).collect();
if parts.is_empty() {
return None;
}
let first = parts[0];
if !parts.iter().all(|p| *p == first) {
return None;
}
let prim = match first {
"u8" => PrimitiveType::U8,
"u16" => PrimitiveType::U16,
"u32" => PrimitiveType::U32,
"u64" => PrimitiveType::U64,
"i8" => PrimitiveType::I8,
"i16" => PrimitiveType::I16,
"i32" => PrimitiveType::I32,
"i64" => PrimitiveType::I64,
"f32" => PrimitiveType::F32,
"f64" => PrimitiveType::F64,
"usize" => PrimitiveType::Usize,
"isize" => PrimitiveType::Isize,
_ => return None,
};
Some(TypeRef::Primitive(prim))
}
fn strip_cfg_fields(api: &mut ApiSurface, enabled_features: &[String]) {
for typ in &mut api.types {
let original_count = typ.fields.len();
let cfg_count = typ.fields.iter().filter(|f| f.cfg.is_some()).count();
typ.fields.retain(|f| match &f.cfg {
None => true,
Some(cfg_str) => cfg_str
.strip_prefix("feature = \"")
.and_then(|s| s.strip_suffix('"'))
.is_some_and(|feature| enabled_features.iter().any(|ef| ef == feature)),
});
for field in &mut typ.fields {
field.cfg = None;
}
if cfg_count > 0 && typ.fields.len() < original_count {
typ.has_stripped_cfg_fields = true;
}
}
}
fn dedup_api_surface(api: &mut ApiSurface) {
let enum_names: AHashSet<String> = api.enums.iter().map(|e| e.name.clone()).collect();
api.types.retain(|t| !enum_names.contains(&t.name));
{
let mut best: AHashMap<String, usize> = AHashMap::new();
for (i, t) in api.types.iter().enumerate() {
best.entry(t.name.clone())
.and_modify(|prev_i| {
if api.types[i].rust_path.len() < api.types[*prev_i].rust_path.len() {
*prev_i = i;
}
})
.or_insert(i);
}
let keep: AHashSet<usize> = best.values().copied().collect();
let mut idx = 0;
api.types.retain(|_| {
let k = keep.contains(&idx);
idx += 1;
k
});
}
{
let mut best: AHashMap<String, usize> = AHashMap::new();
for (i, e) in api.enums.iter().enumerate() {
best.entry(e.name.clone())
.and_modify(|prev_i| {
if api.enums[i].rust_path.len() < api.enums[*prev_i].rust_path.len() {
*prev_i = i;
}
})
.or_insert(i);
}
let keep: AHashSet<usize> = best.values().copied().collect();
let mut idx = 0;
api.enums.retain(|_| {
let k = keep.contains(&idx);
idx += 1;
k
});
}
let mut seen_fns: AHashSet<String> = AHashSet::new();
api.functions.retain(|f| seen_fns.insert(f.name.clone()));
let mut seen_errors: AHashSet<String> = AHashSet::new();
api.errors.retain(|e| seen_errors.insert(e.name.clone()));
}
fn apply_filters(mut api: ApiSurface, config: &AlefConfig) -> ApiSurface {
let exclude = &config.exclude;
let include = &config.include;
if !include.types.is_empty() {
let expanded = expand_include_list(&api, &include.types);
api.types.retain(|t| expanded.contains(&t.name));
api.enums.retain(|e| expanded.contains(&e.name));
}
if !include.functions.is_empty() {
api.functions.retain(|f| include.functions.contains(&f.name));
}
api.types.retain(|t| !exclude.types.contains(&t.name));
api.functions.retain(|f| !exclude.functions.contains(&f.name));
api.enums.retain(|e| !exclude.types.contains(&e.name));
api.errors.retain(|e| !exclude.types.contains(&e.name));
if !exclude.methods.is_empty() {
for typ in &mut api.types {
typ.methods.retain(|m| {
let key = format!("{}.{}", typ.name, m.name);
!exclude.methods.contains(&key)
});
}
}
api
}
fn expand_include_list(api: &ApiSurface, include_types: &[String]) -> AHashSet<String> {
let mut needed: AHashSet<String> = include_types.iter().cloned().collect();
let mut changed = true;
let all_types: AHashMap<String, &TypeDef> = api.types.iter().map(|t| (t.name.clone(), t)).collect();
let all_enums: AHashSet<String> = api.enums.iter().map(|e| e.name.clone()).collect();
while changed {
changed = false;
let current: Vec<String> = needed.iter().cloned().collect();
for type_name in ¤t {
if let Some(typ) = all_types.get(type_name) {
for field in &typ.fields {
collect_named_types(&field.ty, &mut needed, &all_types, &all_enums, &mut changed);
}
for method in &typ.methods {
collect_named_types(&method.return_type, &mut needed, &all_types, &all_enums, &mut changed);
for param in &method.params {
collect_named_types(¶m.ty, &mut needed, &all_types, &all_enums, &mut changed);
}
}
}
}
}
needed
}
fn collect_named_types(
ty: &TypeRef,
needed: &mut AHashSet<String>,
all_types: &AHashMap<String, &TypeDef>,
all_enums: &AHashSet<String>,
changed: &mut bool,
) {
match ty {
TypeRef::Named(name)
if (all_types.contains_key(name) || all_enums.contains(name)) && needed.insert(name.clone()) =>
{
*changed = true;
}
TypeRef::Optional(inner) | TypeRef::Vec(inner) => {
collect_named_types(inner, needed, all_types, all_enums, changed);
}
TypeRef::Map(k, v) => {
collect_named_types(k, needed, all_types, all_enums, changed);
collect_named_types(v, needed, all_types, all_enums, changed);
}
_ => {}
}
}
fn rewrite_path(path: &str, mappings: &HashMap<String, String>) -> String {
let mut sorted: Vec<_> = mappings.iter().collect();
sorted.sort_by_key(|b| std::cmp::Reverse(b.0.len()));
for (from, to) in sorted {
if path.starts_with(from.as_str()) {
return format!("{}{}", to, &path[from.len()..]);
}
}
path.to_string()
}
fn apply_path_mappings(api: &mut ApiSurface, config: &AlefConfig) {
let mappings = config.effective_path_mappings();
if mappings.is_empty() {
return;
}
for typ in &mut api.types {
if typ.original_rust_path.is_empty() {
typ.original_rust_path = typ.rust_path.clone();
}
typ.rust_path = rewrite_path(&typ.rust_path, &mappings);
for field in &mut typ.fields {
if let Some(ref mut path) = field.type_rust_path {
*path = rewrite_path(path, &mappings);
}
}
}
for func in &mut api.functions {
if func.original_rust_path.is_empty() {
func.original_rust_path = func.rust_path.clone();
}
func.rust_path = rewrite_path(&func.rust_path, &mappings);
}
for enum_def in &mut api.enums {
if enum_def.original_rust_path.is_empty() {
enum_def.original_rust_path = enum_def.rust_path.clone();
}
enum_def.rust_path = rewrite_path(&enum_def.rust_path, &mappings);
}
for error_def in &mut api.errors {
if error_def.original_rust_path.is_empty() {
error_def.original_rust_path = error_def.rust_path.clone();
}
error_def.rust_path = rewrite_path(&error_def.rust_path, &mappings);
}
}