use crate::core::config::ResolvedCrateConfig;
use crate::core::ir::{ApiSurface, TypeDef, TypeRef};
use ahash::{AHashMap, AHashSet};
use anyhow::Context as _;
use std::collections::HashMap;
use std::path::Path;
use tracing::{debug, info};
use crate::cli::cache;
use super::version::read_version;
const IR_CACHE_SCHEMA_VERSION: &str = "ir-cache-v2";
pub fn ensure_gitignore(base_dir: &Path, config: &ResolvedCrateConfig) {
use crate::core::config::Language;
let gitignore_path = base_dir.join(".gitignore");
let existing = std::fs::read_to_string(&gitignore_path).unwrap_or_default();
let existing_lines: AHashSet<&str> = existing.lines().map(str::trim).collect();
let mut entries: Vec<&str> = vec![".alef/"];
for lang in &config.languages {
match lang {
Language::Python => {
entries.extend_from_slice(&["__pycache__/", "*.so", "*.pyd", ".venv/", "*.egg-info/", "dist/"])
}
Language::Node => entries.extend_from_slice(&["node_modules/", "*.node"]),
Language::Ruby => entries.extend_from_slice(&[".gems/", "vendor/bundle/"]),
Language::Php => entries.extend_from_slice(&["vendor/"]),
Language::Ffi => entries.push("*.h.bak"),
Language::Go => entries.push("*.test"),
Language::Java => entries.extend_from_slice(&["target/", "*.class"]),
Language::Csharp => entries.extend_from_slice(&["bin/", "obj/", "*.nupkg"]),
Language::Wasm => {}
_ => {}
}
}
let mut to_add = Vec::new();
for entry in &entries {
if !existing_lines.contains(entry) {
to_add.push(*entry);
}
}
if to_add.is_empty() {
return;
}
let separator = if existing.is_empty() || existing.ends_with('\n') {
""
} else {
"\n"
};
let additions = to_add.join("\n");
let new_content = format!("{existing}{separator}{additions}\n");
if let Err(e) = std::fs::write(&gitignore_path, new_content) {
debug!("Could not update .gitignore: {e}");
} else {
debug!("Updated .gitignore with {} entries", to_add.len());
}
}
pub fn extract(config: &ResolvedCrateConfig, config_path: &Path, clean: bool) -> anyhow::Result<ApiSurface> {
if let Some(parent) = config_path.parent() {
ensure_gitignore(parent, config);
}
cache::validate_cache_crate_name(&config.name).context("invalid crate name for cache")?;
let source_hash = cache::sources_hash(&config.sources).context("failed to compute sources hash")?;
let version_for_hash = config.resolved_version().unwrap_or_default();
let config_hash = extraction_config_hash(config)?;
let cache_key = format!("{IR_CACHE_SCHEMA_VERSION}:{source_hash}:{version_for_hash}:{config_hash}");
if !clean && cache::is_ir_cached(&config.name, &cache_key) {
info!("Using cached IR");
let api = cache::read_cached_ir(&config.name).context("failed to read cached IR")?;
validate_extracted_api(&api, config)?;
return Ok(api);
}
let mut api = extract_raw(config, config_path)?;
api = apply_filters(api, config);
inject_declared_opaque_types(&mut api, config);
strip_cfg_fields(&mut api, &config.features);
strip_binding_excluded(&mut api)?;
sanitize_unknown_types(&mut api);
apply_path_mappings(&mut api, config);
dedup_api_surface(&mut api);
normalize_field_type_paths(&mut api);
let service_errors = crate::extract::extractor::service::extract_services(&mut api, config);
if !service_errors.is_empty() {
let formatted = service_errors
.iter()
.map(|message| format!("- {message}"))
.collect::<Vec<_>>()
.join("\n");
anyhow::bail!("service extraction failed:\n{formatted}");
}
mark_adapter_handled_methods(&mut api, config);
if !config.exclude.methods.is_empty() {
for service in &mut api.services {
service.configurators.retain(|m| {
let key = format!("{}.{}", service.name, m.name);
!config.exclude.methods.contains(&key)
});
}
for typ in &mut api.types {
typ.methods.retain(|m| {
let key = format!("{}.{}", typ.name, m.name);
!config.exclude.methods.contains(&key)
});
}
}
validate_extracted_api(&api, config)?;
cache::write_ir_cache(&config.name, &api, &cache_key).context("failed to write IR cache")?;
info!(
"Extracted {} types, {} functions, {} enums",
api.types.len(),
api.functions.len(),
api.enums.len()
);
Ok(api)
}
fn extraction_config_hash(config: &ResolvedCrateConfig) -> anyhow::Result<String> {
let config_toml = toml::to_string(config).context("failed to serialize resolved config for IR cache key")?;
Ok(blake3::hash(config_toml.as_bytes()).to_hex().to_string())
}
fn validate_extracted_api(api: &ApiSurface, config: &ResolvedCrateConfig) -> anyhow::Result<()> {
let bridged_trait_names: AHashSet<&str> = config
.trait_bridges
.iter()
.map(|bridge| bridge.trait_name.as_str())
.collect();
let validation_report =
crate::core::validation::validate_api_surface_with_bridged_traits(api, &bridged_trait_names);
for diagnostic in validation_report.warnings() {
tracing::warn!("{diagnostic}");
}
let (suppressed, fatal): (Vec<_>, Vec<_>) = validation_report.errors().partition(|d| {
!crate::core::validation::is_critical_unsuppressible(d.code)
&& config
.suppress_validation_codes
.iter()
.any(|code| code == &d.code.to_string())
});
for diagnostic in suppressed {
tracing::warn!("[suppressed] {diagnostic}");
}
if !fatal.is_empty() {
let formatted = fatal
.iter()
.map(|d| {
let path = d
.item_path
.as_deref()
.map(|p| format!(" item `{p}`"))
.unwrap_or_default();
format!("- [{}]{path} {}", d.code, d.reason)
})
.collect::<Vec<_>>()
.join("\n");
anyhow::bail!("{}", formatted);
}
Ok(())
}
fn extract_raw(config: &ResolvedCrateConfig, _config_path: &Path) -> anyhow::Result<ApiSurface> {
info!("Extracting API surface from Rust source...");
let version = read_version(&config.version_from)?;
let workspace_root = config.workspace_root.as_deref();
let default_name = &config.name;
let mut groups: std::collections::BTreeMap<String, Vec<&Path>> = std::collections::BTreeMap::new();
if !config.source_crates.is_empty() {
for sc in &config.source_crates {
let crate_name = sc.name.replace('-', "_");
for source in &sc.sources {
groups.entry(crate_name.clone()).or_default().push(source.as_path());
}
}
} else {
for source in &config.sources {
let crate_name = derive_crate_name_from_path(source, default_name);
groups.entry(crate_name).or_default().push(source.as_path());
}
}
let mut merged = ApiSurface {
crate_name: default_name.to_string(),
version: version.clone(),
..ApiSurface::default()
};
for (crate_name, sources) in &groups {
let api = crate::extract::extractor::extract(sources, crate_name, &version, workspace_root)
.with_context(|| format!("failed to extract API surface from crate {crate_name}"))?;
merged.types.extend(api.types);
merged.functions.extend(api.functions);
merged.enums.extend(api.enums);
merged.errors.extend(api.errors);
merged.excluded_type_paths.extend(api.excluded_type_paths);
merged.excluded_trait_names.extend(api.excluded_trait_names);
merged.unsupported_public_items.extend(api.unsupported_public_items);
}
let return_type_names: ahash::AHashSet<String> = merged
.functions
.iter()
.filter_map(|f| match &f.return_type {
crate::core::ir::TypeRef::Named(name) => Some(name.clone()),
_ => None,
})
.collect();
for typ in &mut merged.types {
if return_type_names.contains(&typ.name) {
typ.is_return_type = true;
}
}
Ok(merged)
}
fn derive_crate_name_from_path(path: &Path, default: &str) -> String {
let path_str = path.to_string_lossy();
if let Some(after_crates) = path_str.split("crates/").nth(1) {
if let Some(name) = after_crates.split('/').next() {
if path_str.contains(&format!("crates/{name}/src/")) {
return name.replace('-', "_");
}
}
}
default.to_string()
}
fn inject_declared_opaque_types(api: &mut ApiSurface, config: &ResolvedCrateConfig) {
let mut sorted_opaques: Vec<_> = config.opaque_types.iter().collect();
sorted_opaques.sort_by_key(|(name, _)| (*name).clone());
for (name, rust_path) in sorted_opaques {
if !api.types.iter().any(|t| t.name == *name) && !api.enums.iter().any(|e| e.name == *name) {
api.types.push(crate::core::ir::TypeDef {
name: name.clone(),
rust_path: rust_path.clone(),
original_rust_path: rust_path.clone(),
fields: vec![],
methods: vec![],
is_opaque: true,
is_clone: false,
is_copy: false,
is_trait: false,
has_default: false,
has_stripped_cfg_fields: false,
is_return_type: false,
doc: String::new(),
cfg: None,
serde_rename_all: None,
has_serde: false,
super_traits: vec![],
binding_excluded: false,
binding_exclusion_reason: None,
is_variant_wrapper: false,
has_lifetime_params: false,
});
debug!("Injected declared opaque type: {name} -> {rust_path}");
}
}
}
fn sanitize_unknown_types(api: &mut ApiSurface) {
let known_types: AHashSet<String> = api.types.iter().map(|t| t.name.clone()).collect();
let known_enums: AHashSet<String> = api.enums.iter().map(|e| e.name.clone()).collect();
let known_type_paths: AHashSet<String> = api.types.iter().map(|t| t.rust_path.replace('-', "_")).collect();
let known_enum_paths: AHashSet<String> = api.enums.iter().map(|e| e.rust_path.replace('-', "_")).collect();
for typ in &mut api.types {
for field in &mut typ.fields {
let original = extract_tuple_vec_original_type(&field.ty);
if sanitize_type_ref(&mut field.ty, &known_types, &known_enums).is_lossy() {
field.sanitized = true;
if let Some(orig) = original {
field.original_type = Some(orig);
}
}
if !field.sanitized {
if let Some(ref path) = field.type_rust_path {
let normalized_path = path.replace('-', "_");
if let TypeRef::Named(ref name) = field.ty {
if known_types.contains(name.as_str()) || known_enums.contains(name.as_str()) {
let path_type_name = normalized_path.rsplit("::").next().unwrap_or("");
let path_matches = known_type_paths
.iter()
.chain(known_enum_paths.iter())
.any(|kp| kp.rsplit("::").next().unwrap_or("") == path_type_name);
if !path_matches {
field.ty = TypeRef::String;
field.sanitized = true;
}
}
}
if let TypeRef::Vec(ref inner) = field.ty {
if let TypeRef::Named(ref name) = **inner {
let vec_path_type = normalized_path.rsplit("::").next().unwrap_or("");
let vec_matches = known_type_paths
.iter()
.chain(known_enum_paths.iter())
.any(|kp| kp.rsplit("::").next().unwrap_or("") == vec_path_type);
if (known_types.contains(name.as_str()) || known_enums.contains(name.as_str()))
&& !vec_matches
{
field.ty = TypeRef::String;
field.sanitized = true;
}
}
}
}
}
}
let type_name = typ.name.clone();
let is_trait = typ.is_trait;
for method in &mut typ.methods {
if is_trait {
continue;
}
let mut method_sanitized = false;
for param in &mut method.params {
if sanitize_type_ref(&mut param.ty, &known_types, &known_enums).is_lossy() {
param.sanitized = true;
method_sanitized = true;
}
}
let is_self_return = matches!(&method.return_type, TypeRef::Named(n) if n == &type_name);
if !is_self_return && sanitize_type_ref(&mut method.return_type, &known_types, &known_enums).is_lossy() {
method_sanitized = true;
}
if method_sanitized {
method.sanitized = true;
}
}
}
for func in &mut api.functions {
let mut func_sanitized = false;
for param in &mut func.params {
if sanitize_type_ref(&mut param.ty, &known_types, &known_enums).is_lossy() {
param.sanitized = true;
func_sanitized = true;
}
}
if sanitize_type_ref(&mut func.return_type, &known_types, &known_enums).is_lossy() {
func_sanitized = true;
func.return_sanitized = true;
}
if func_sanitized {
func.sanitized = true;
}
}
for enum_def in &mut api.enums {
for variant in &mut enum_def.variants {
for field in &mut variant.fields {
let original = extract_tuple_vec_original_type(&field.ty);
if sanitize_type_ref(&mut field.ty, &known_types, &known_enums).is_lossy() {
field.sanitized = true;
if let Some(orig) = original {
field.original_type = Some(orig);
}
}
}
}
}
for error_def in &mut api.errors {
for variant in &mut error_def.variants {
for field in &mut variant.fields {
let original = extract_tuple_vec_original_type(&field.ty);
if sanitize_type_ref(&mut field.ty, &known_types, &known_enums).is_lossy() {
field.sanitized = true;
if let Some(orig) = original {
field.original_type = Some(orig);
}
}
}
}
}
}
fn strip_binding_excluded(api: &mut ApiSurface) -> anyhow::Result<()> {
for typ in &api.types {
if typ.binding_excluded {
let reason = typ
.binding_exclusion_reason
.as_deref()
.unwrap_or("source binding exclusion");
info!("Stripping excluded type: {} ({})", typ.name, reason);
api.excluded_type_paths
.insert(typ.name.clone(), typ.rust_path.replace('-', "_"));
if typ.is_trait {
api.excluded_trait_names.insert(typ.name.clone());
}
}
}
for enm in &api.enums {
if enm.binding_excluded {
let reason = enm
.binding_exclusion_reason
.as_deref()
.unwrap_or("source binding exclusion");
info!("Stripping excluded enum: {} ({})", enm.name, reason);
api.excluded_type_paths
.insert(enm.name.clone(), enm.rust_path.replace('-', "_"));
}
}
for err in &api.errors {
if err.binding_excluded {
let reason = err
.binding_exclusion_reason
.as_deref()
.unwrap_or("source binding exclusion");
info!("Stripping excluded error: {} ({})", err.name, reason);
api.excluded_type_paths
.insert(err.name.clone(), err.rust_path.replace('-', "_"));
}
}
api.types.retain(|t| !t.binding_excluded);
api.enums.retain(|e| !e.binding_excluded);
api.errors.retain(|e| !e.binding_excluded);
for func in &api.functions {
if func.binding_excluded {
let reason = func
.binding_exclusion_reason
.as_deref()
.unwrap_or("source binding exclusion");
info!("Stripping excluded function: {} ({})", func.name, reason);
}
}
api.functions.retain(|f| !f.binding_excluded);
for typ in &mut api.types {
let excluded_methods: Vec<String> = typ
.methods
.iter()
.filter(|m| m.binding_excluded)
.map(|m| {
let reason = m
.binding_exclusion_reason
.as_deref()
.unwrap_or("source binding exclusion");
format!("{}.{} ({})", typ.name, m.name, reason)
})
.collect();
if !excluded_methods.is_empty() {
info!("Stripping excluded methods: {}", excluded_methods.join(", "));
}
typ.methods.retain(|m| !m.binding_excluded);
}
for typ in &api.types {
let excluded: Vec<_> = typ
.fields
.iter()
.filter(|field| field.binding_excluded)
.map(|field| {
let reason = field
.binding_exclusion_reason
.as_deref()
.unwrap_or("source binding exclusion");
format!("{}.{} ({reason})", typ.name, field.name)
})
.collect();
if !excluded.is_empty() {
info!("Hiding binding-excluded fields: {}", excluded.join(", "));
}
}
for enum_def in &mut api.enums {
let excluded: Vec<String> = enum_def
.variants
.iter()
.flat_map(|variant| {
variant.fields.iter().filter(|f| f.binding_excluded).map(|f| {
let reason = f
.binding_exclusion_reason
.as_deref()
.unwrap_or("source binding exclusion");
format!("{}::{}.{} ({reason})", enum_def.name, variant.name, f.name)
})
})
.collect();
if !excluded.is_empty() {
info!("Hiding binding-excluded enum variant fields: {}", excluded.join(", "));
}
for variant in &mut enum_def.variants {
if !variant.fields.is_empty() && variant.fields.iter().all(|f| f.binding_excluded) {
variant.originally_had_data_fields = true;
}
}
}
for error_def in &mut api.errors {
for variant in &mut error_def.variants {
let _ = variant; }
}
Ok(())
}
fn mark_adapter_handled_methods(api: &mut ApiSurface, config: &ResolvedCrateConfig) {
use ahash::AHashSet;
let adapter_handled: AHashSet<(String, String)> = config
.adapters
.iter()
.filter_map(|adapter| {
adapter
.owner_type
.as_deref()
.map(|owner| (owner.to_string(), adapter.core_path.clone()))
})
.collect();
if adapter_handled.is_empty() {
return;
}
for typ in &mut api.types {
for method in &mut typ.methods {
if adapter_handled.contains(&(typ.name.clone(), method.name.clone())) && !method.binding_excluded {
method.binding_excluded = true;
if method.binding_exclusion_reason.is_none() {
method.binding_exclusion_reason =
Some(format!("handled by [[crates.adapters]] entry `{}`", method.name));
}
}
}
}
}
fn extract_tuple_vec_original_type(ty: &TypeRef) -> Option<String> {
fn inner_tuple_name(ty: &TypeRef) -> Option<String> {
if let TypeRef::Vec(inner) = ty {
if let TypeRef::Named(name) = inner.as_ref() {
if name.trim_start().starts_with('(') {
return Some(format!("Vec<{name}>"));
}
}
}
None
}
fn fixed_tuple_array_name(name: &str) -> Option<String> {
let s = name.trim();
if s.starts_with("[(") && s.contains(");") {
Some(s.to_string())
} else {
None
}
}
match ty {
TypeRef::Vec(_) => inner_tuple_name(ty),
TypeRef::Optional(inner) => inner_tuple_name(inner),
TypeRef::Named(name) => fixed_tuple_array_name(name),
_ => None,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TypeSanitization {
Unchanged,
Lossless,
Lossy,
}
impl TypeSanitization {
fn is_lossy(self) -> bool {
self == Self::Lossy
}
fn combine(self, other: Self) -> Self {
match (self, other) {
(Self::Lossy, _) | (_, Self::Lossy) => Self::Lossy,
(Self::Lossless, _) | (_, Self::Lossless) => Self::Lossless,
(Self::Unchanged, Self::Unchanged) => Self::Unchanged,
}
}
}
fn sanitize_type_ref(
ty: &mut TypeRef,
known_types: &AHashSet<String>,
known_enums: &AHashSet<String>,
) -> TypeSanitization {
match ty {
TypeRef::Named(name) if !known_types.contains(name.as_str()) && !known_enums.contains(name.as_str()) => {
if name == "Value" || name == "JsonValue" {
return TypeSanitization::Unchanged;
}
if let Some(elem_ty) = parse_homogeneous_tuple(name) {
*ty = TypeRef::Vec(Box::new(elem_ty));
return TypeSanitization::Lossy; }
*ty = TypeRef::String;
TypeSanitization::Lossy
}
TypeRef::Optional(inner) | TypeRef::Vec(inner) => sanitize_type_ref(inner, known_types, known_enums),
TypeRef::Map(k, v) => {
if contains_ambiguous_bare_value(k) || contains_ambiguous_bare_value(v) {
return TypeSanitization::Lossy;
}
let key_status = sanitize_map_inner_type(k, known_types, known_enums);
let value_status = sanitize_map_inner_type(v, known_types, known_enums);
key_status.combine(value_status)
}
_ => TypeSanitization::Unchanged,
}
}
fn sanitize_map_inner_type(
ty: &mut TypeRef,
known_types: &AHashSet<String>,
known_enums: &AHashSet<String>,
) -> TypeSanitization {
if matches!(ty, TypeRef::Named(name) if name == "str") {
*ty = TypeRef::String;
return TypeSanitization::Lossless;
}
sanitize_type_ref(ty, known_types, known_enums)
}
fn contains_ambiguous_bare_value(ty: &TypeRef) -> bool {
match ty {
TypeRef::Named(name) => name == "Value" || name == "JsonValue",
TypeRef::Optional(inner) | TypeRef::Vec(inner) => contains_ambiguous_bare_value(inner),
TypeRef::Map(key, value) => contains_ambiguous_bare_value(key) || contains_ambiguous_bare_value(value),
_ => false,
}
}
fn parse_homogeneous_tuple(name: &str) -> Option<TypeRef> {
use crate::core::ir::PrimitiveType;
let name = name.trim();
let inner = name.strip_prefix('(')?.strip_suffix(')')?;
let parts: Vec<&str> = inner.split(',').map(str::trim).collect();
if parts.is_empty() {
return None;
}
let first = parts[0];
if !parts.iter().all(|p| *p == first) {
return None;
}
if first == "String" {
return Some(TypeRef::String);
}
let prim = match first {
"u8" => PrimitiveType::U8,
"u16" => PrimitiveType::U16,
"u32" => PrimitiveType::U32,
"u64" => PrimitiveType::U64,
"i8" => PrimitiveType::I8,
"i16" => PrimitiveType::I16,
"i32" => PrimitiveType::I32,
"i64" => PrimitiveType::I64,
"f32" => PrimitiveType::F32,
"f64" => PrimitiveType::F64,
"usize" => PrimitiveType::Usize,
"isize" => PrimitiveType::Isize,
_ => return None,
};
Some(TypeRef::Primitive(prim))
}
fn strip_cfg_fields(api: &mut ApiSurface, enabled_features: &[String]) {
for typ in &mut api.types {
let original_count = typ.fields.len();
let cfg_count = typ.fields.iter().filter(|f| f.cfg.is_some()).count();
typ.fields.retain(|f| match &f.cfg {
None => true,
Some(cfg_str) => cfg_condition_enabled(cfg_str, enabled_features),
});
if cfg_count > 0 && typ.fields.len() < original_count {
typ.has_stripped_cfg_fields = true;
}
}
}
fn cfg_condition_enabled(cfg_str: &str, enabled_features: &[String]) -> bool {
let normalized: String = {
let t = cfg_str.trim();
let t = t.replace(" (", "(");
t
};
let cfg_str = normalized.as_str();
if let Some(feature) = cfg_str.strip_prefix("feature = \"").and_then(|s| s.strip_suffix('"')) {
return enabled_features.iter().any(|ef| ef == feature);
}
if let Some(inner) = cfg_str.strip_prefix("any(").and_then(|s| s.strip_suffix(')')) {
return parse_cfg_list(inner)
.iter()
.any(|cond| cfg_condition_enabled(cond, enabled_features));
}
if let Some(inner) = cfg_str.strip_prefix("all(").and_then(|s| s.strip_suffix(')')) {
return parse_cfg_list(inner)
.iter()
.all(|cond| cfg_condition_enabled(cond, enabled_features));
}
if let Some(inner) = cfg_str.strip_prefix("not(").and_then(|s| s.strip_suffix(')')) {
return !cfg_condition_enabled(inner.trim(), enabled_features);
}
false
}
fn parse_cfg_list(s: &str) -> Vec<String> {
let mut result = Vec::new();
let mut depth = 0usize;
let mut current = String::new();
for ch in s.chars() {
match ch {
'(' => {
depth += 1;
current.push(ch);
}
')' => {
depth = depth.saturating_sub(1);
current.push(ch);
}
',' if depth == 0 => {
let trimmed = current.trim().to_string();
if !trimmed.is_empty() {
result.push(trimmed);
}
current.clear();
}
_ => current.push(ch),
}
}
let trimmed = current.trim().to_string();
if !trimmed.is_empty() {
result.push(trimmed);
}
result
}
fn dedup_api_surface(api: &mut ApiSurface) {
let enum_names: AHashSet<String> = api.enums.iter().map(|e| e.name.clone()).collect();
api.types.retain(|t| !enum_names.contains(&t.name));
let error_names: AHashSet<String> = api.errors.iter().map(|e| e.name.clone()).collect();
api.types.retain(|t| !error_names.contains(&t.name));
{
let mut best: AHashMap<String, usize> = AHashMap::new();
for (i, t) in api.types.iter().enumerate() {
best.entry(t.name.clone())
.and_modify(|prev_i| {
if api.types[i].rust_path.len() < api.types[*prev_i].rust_path.len() {
*prev_i = i;
}
})
.or_insert(i);
}
let keep: AHashSet<usize> = best.values().copied().collect();
let mut idx = 0;
api.types.retain(|_| {
let k = keep.contains(&idx);
idx += 1;
k
});
}
{
let mut best: AHashMap<String, usize> = AHashMap::new();
for (i, e) in api.enums.iter().enumerate() {
best.entry(e.name.clone())
.and_modify(|prev_i| {
if api.enums[i].rust_path.len() < api.enums[*prev_i].rust_path.len() {
*prev_i = i;
}
})
.or_insert(i);
}
let keep: AHashSet<usize> = best.values().copied().collect();
let mut idx = 0;
api.enums.retain(|_| {
let k = keep.contains(&idx);
idx += 1;
k
});
}
{
let mut best: AHashMap<String, usize> = AHashMap::new();
for (i, f) in api.functions.iter().enumerate() {
best.entry(f.name.clone())
.and_modify(|prev_i| {
if api.functions[i].rust_path.len() < api.functions[*prev_i].rust_path.len() {
*prev_i = i;
}
})
.or_insert(i);
}
let keep: AHashSet<usize> = best.values().copied().collect();
let mut idx = 0;
api.functions.retain(|_| {
let k = keep.contains(&idx);
idx += 1;
k
});
}
let mut seen_errors: AHashSet<String> = AHashSet::new();
api.errors.retain(|e| seen_errors.insert(e.name.clone()));
}
fn is_type_excluded(name: &str, rust_path: &str, exclude_list: &[String]) -> bool {
exclude_list.iter().any(|entry| {
if entry.contains("::") {
let normalised = rust_path.replace('-', "_");
normalised == entry.as_str()
} else {
name == entry.as_str()
}
})
}
fn apply_filters(mut api: ApiSurface, config: &ResolvedCrateConfig) -> ApiSurface {
let exclude = &config.exclude;
let include = &config.include;
let mut expanded_include: Option<AHashSet<String>> = None;
if !include.types.is_empty() {
let expanded = expand_include_list(&api, &include.types, &include.functions);
api.types.retain(|t| expanded.contains(&t.name));
api.enums.retain(|e| expanded.contains(&e.name));
expanded_include = Some(expanded);
}
if !include.functions.is_empty() {
api.functions.retain(|f| include.functions.contains(&f.name));
}
if expanded_include.is_some() || !include.functions.is_empty() {
api.unsupported_public_items.retain(|item| {
let short_name = item.item_path.rsplit("::").next().unwrap_or(item.item_path.as_str());
let owner_name = short_name.split('.').next().unwrap_or(short_name);
let included_type = expanded_include
.as_ref()
.is_some_and(|expanded| expanded.contains(owner_name));
let included_function =
item.item_kind == "function" && include.functions.iter().any(|name| name == owner_name);
included_type || included_function
});
}
for typ in &api.types {
if is_type_excluded(&typ.name, &typ.rust_path, &exclude.types) {
api.excluded_type_paths
.insert(typ.name.clone(), typ.rust_path.replace('-', "_"));
}
}
for enm in &api.enums {
if is_type_excluded(&enm.name, &enm.rust_path, &exclude.types) {
api.excluded_type_paths
.insert(enm.name.clone(), enm.rust_path.replace('-', "_"));
}
}
api.types
.retain(|t| !is_type_excluded(&t.name, &t.rust_path, &exclude.types));
api.functions.retain(|f| !exclude.functions.contains(&f.name));
api.enums
.retain(|e| !is_type_excluded(&e.name, &e.rust_path, &exclude.types));
api.errors
.retain(|e| !is_type_excluded(&e.name, &e.rust_path, &exclude.types));
api.unsupported_public_items.retain(|item| {
let short_name = item.item_path.rsplit("::").next().unwrap_or(item.item_path.as_str());
let by_type_name = is_type_excluded(short_name, &item.item_path, &exclude.types);
let by_fn_name = item.item_kind == "function" && exclude.functions.contains(&short_name.to_string());
let by_method_name = item.item_kind == "method" && exclude.methods.contains(&short_name.to_string());
let by_parent_excluded = if item.item_kind == "method" {
if let Some((owner_short, _)) = short_name.split_once('.') {
let owner_full = item
.item_path
.rsplit_once('.')
.map(|(p, _)| p)
.unwrap_or(item.item_path.as_str());
is_type_excluded(owner_short, owner_full, &exclude.types)
} else {
false
}
} else {
false
};
!(by_type_name || by_fn_name || by_method_name || by_parent_excluded)
});
if !exclude.methods.is_empty() {
for typ in &mut api.types {
typ.methods.retain(|m| {
let key = format!("{}.{}", typ.name, m.name);
!exclude.methods.contains(&key)
});
}
for service in &mut api.services {
service.configurators.retain(|m| {
let key = format!("{}.{}", service.name, m.name);
!exclude.methods.contains(&key)
});
}
}
api
}
fn expand_include_list(api: &ApiSurface, include_types: &[String], include_functions: &[String]) -> AHashSet<String> {
let mut needed: AHashSet<String> = include_types.iter().cloned().collect();
let mut changed = true;
let all_types: AHashMap<String, &TypeDef> = api.types.iter().map(|t| (t.name.clone(), t)).collect();
let all_enums: AHashSet<String> = api.enums.iter().map(|e| e.name.clone()).collect();
let include_function_set: AHashSet<&str> = include_functions.iter().map(String::as_str).collect();
if !include_function_set.is_empty() {
for func in &api.functions {
if !include_function_set.contains(func.name.as_str()) {
continue;
}
collect_named_types(&func.return_type, &mut needed, &all_types, &all_enums, &mut changed);
for param in &func.params {
collect_named_types(¶m.ty, &mut needed, &all_types, &all_enums, &mut changed);
}
}
}
while changed {
changed = false;
let current: Vec<String> = needed.iter().cloned().collect();
for type_name in ¤t {
if let Some(typ) = all_types.get(type_name) {
for field in &typ.fields {
collect_named_types(&field.ty, &mut needed, &all_types, &all_enums, &mut changed);
}
for method in &typ.methods {
collect_named_types(&method.return_type, &mut needed, &all_types, &all_enums, &mut changed);
for param in &method.params {
collect_named_types(¶m.ty, &mut needed, &all_types, &all_enums, &mut changed);
}
}
}
}
}
needed
}
fn collect_named_types(
ty: &TypeRef,
needed: &mut AHashSet<String>,
all_types: &AHashMap<String, &TypeDef>,
all_enums: &AHashSet<String>,
changed: &mut bool,
) {
match ty {
TypeRef::Named(name)
if (all_types.contains_key(name) || all_enums.contains(name)) && needed.insert(name.clone()) =>
{
*changed = true;
}
TypeRef::Optional(inner) | TypeRef::Vec(inner) => {
collect_named_types(inner, needed, all_types, all_enums, changed);
}
TypeRef::Map(k, v) => {
collect_named_types(k, needed, all_types, all_enums, changed);
collect_named_types(v, needed, all_types, all_enums, changed);
}
_ => {}
}
}
fn rewrite_path(path: &str, mappings: &HashMap<String, String>) -> String {
let mut sorted: Vec<_> = mappings.iter().collect();
sorted.sort_by_key(|b| std::cmp::Reverse(b.0.len()));
for (from, to) in sorted {
if path.starts_with(from.as_str()) {
return format!("{}{}", to, &path[from.len()..]);
}
}
path.to_string()
}
fn normalize_field_type_paths(api: &mut ApiSurface) {
fn named_name(ty: &TypeRef) -> Option<&str> {
match ty {
TypeRef::Named(n) => Some(n.as_str()),
TypeRef::Optional(inner) | TypeRef::Vec(inner) => named_name(inner),
TypeRef::Map(_, v) => named_name(v),
_ => None,
}
}
let mut canonical: AHashMap<String, String> = AHashMap::new();
for t in &api.types {
canonical.insert(t.name.clone(), t.rust_path.clone());
}
for e in &api.enums {
canonical.entry(e.name.clone()).or_insert_with(|| e.rust_path.clone());
}
let fix = |fields: &mut Vec<crate::core::ir::FieldDef>| {
for field in fields {
if field.type_rust_path.is_none() {
continue;
}
if let Some(name) = named_name(&field.ty) {
if let Some(path) = canonical.get(name) {
field.type_rust_path = Some(path.clone());
}
}
}
};
for typ in &mut api.types {
fix(&mut typ.fields);
}
for en in &mut api.enums {
for variant in &mut en.variants {
fix(&mut variant.fields);
}
}
}
fn apply_path_mappings(api: &mut ApiSurface, config: &ResolvedCrateConfig) {
let mappings = config.effective_path_mappings();
if mappings.is_empty() {
return;
}
for typ in &mut api.types {
if typ.original_rust_path.is_empty() {
typ.original_rust_path = typ.rust_path.clone();
}
typ.rust_path = rewrite_path(&typ.rust_path, &mappings);
for field in &mut typ.fields {
if let Some(ref mut path) = field.type_rust_path {
*path = rewrite_path(path, &mappings);
}
}
}
for func in &mut api.functions {
if func.original_rust_path.is_empty() {
func.original_rust_path = func.rust_path.clone();
}
func.rust_path = rewrite_path(&func.rust_path, &mappings);
}
for enum_def in &mut api.enums {
if enum_def.original_rust_path.is_empty() {
enum_def.original_rust_path = enum_def.rust_path.clone();
}
enum_def.rust_path = rewrite_path(&enum_def.rust_path, &mappings);
}
for error_def in &mut api.errors {
if error_def.original_rust_path.is_empty() {
error_def.original_rust_path = error_def.rust_path.clone();
}
error_def.rust_path = rewrite_path(&error_def.rust_path, &mappings);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanitize_map_with_cow_key_preserves_map_structure_and_returns_lossless() {
let known_types = AHashSet::default();
let known_enums = AHashSet::default();
let mut ty = TypeRef::Map(Box::new(TypeRef::Named("str".into())), Box::new(TypeRef::Json));
let status = sanitize_type_ref(&mut ty, &known_types, &known_enums);
assert!(
matches!(&ty, TypeRef::Map(k, v)
if matches!(k.as_ref(), TypeRef::String)
&& matches!(v.as_ref(), TypeRef::Json)),
"expected Map(String, Json) but got {ty:?}"
);
assert_eq!(status, TypeSanitization::Lossless);
let _ = known_types;
let mut ty2 = TypeRef::Map(Box::new(TypeRef::String), Box::new(TypeRef::Json));
let sanitized2 = sanitize_type_ref(&mut ty2, &AHashSet::default(), &AHashSet::default());
assert_eq!(sanitized2, TypeSanitization::Unchanged);
assert!(
matches!(&ty2, TypeRef::Map(k, v)
if matches!(k.as_ref(), TypeRef::String)
&& matches!(v.as_ref(), TypeRef::Json)),
"Map(String, Json) must not be mutated when already clean"
);
}
#[test]
fn sanitize_map_with_bare_value_is_reported_as_sanitized() {
let mut ty = TypeRef::Map(Box::new(TypeRef::String), Box::new(TypeRef::Named("Value".to_string())));
let sanitized = sanitize_type_ref(&mut ty, &AHashSet::default(), &AHashSet::default());
assert!(
sanitized.is_lossy(),
"ambiguous bare Value inside Map must not be silently accepted"
);
assert!(
matches!(&ty, TypeRef::Map(_, value) if matches!(value.as_ref(), TypeRef::Named(name) if name == "Value")),
"ambiguous bare Value must remain visible for validation, got {ty:?}"
);
}
#[test]
fn sanitize_map_with_both_string_types_returns_not_sanitized() {
let mut ty = TypeRef::Map(Box::new(TypeRef::String), Box::new(TypeRef::String));
let sanitized = sanitize_type_ref(&mut ty, &AHashSet::default(), &AHashSet::default());
assert_eq!(sanitized, TypeSanitization::Unchanged);
assert!(matches!(
&ty,
TypeRef::Map(k, v)
if matches!(k.as_ref(), TypeRef::String) && matches!(v.as_ref(), TypeRef::String)
));
}
#[test]
fn sanitize_map_with_unknown_value_type_returns_lossy() {
let mut ty = TypeRef::Map(
Box::new(TypeRef::String),
Box::new(TypeRef::Named("ForeignPayload".into())),
);
let sanitized = sanitize_type_ref(&mut ty, &AHashSet::default(), &AHashSet::default());
assert_eq!(sanitized, TypeSanitization::Lossy);
assert!(
matches!(&ty, TypeRef::Map(_, value) if matches!(value.as_ref(), TypeRef::String)),
"unknown map value should be visibly sanitized for validation, got {ty:?}"
);
}
#[test]
fn sanitize_named_unknown_type_returns_sanitized_true() {
let mut ty = TypeRef::Named("UnknownForeignType".into());
let sanitized = sanitize_type_ref(&mut ty, &AHashSet::default(), &AHashSet::default());
assert!(sanitized.is_lossy());
assert!(matches!(ty, TypeRef::String));
}
#[test]
fn sanitize_vec_with_unknown_named_returns_sanitized_true() {
let mut ty = TypeRef::Vec(Box::new(TypeRef::Named("MyForeignStruct".into())));
let sanitized = sanitize_type_ref(&mut ty, &AHashSet::default(), &AHashSet::default());
assert!(sanitized.is_lossy());
assert!(matches!(
&ty,
TypeRef::Vec(inner) if matches!(inner.as_ref(), TypeRef::String)
));
}
#[test]
fn validate_extracted_api_does_not_suppress_critical_codes() {
let api = ApiSurface {
crate_name: "sample-lib".to_string(),
functions: vec![crate::core::ir::FunctionDef {
name: "render".to_string(),
rust_path: "sample_lib::render".to_string(),
original_rust_path: String::new(),
params: vec![crate::core::ir::ParamDef {
name: "payload".to_string(),
ty: TypeRef::Named("MissingPayload".to_string()),
..crate::core::ir::ParamDef::default()
}],
return_type: TypeRef::String,
error_type: None,
doc: String::new(),
is_async: false,
sanitized: false,
return_sanitized: false,
returns_ref: false,
returns_cow: false,
return_newtype_wrapper: None,
cfg: None,
binding_excluded: false,
binding_exclusion_reason: None,
}],
..ApiSurface::default()
};
let config = ResolvedCrateConfig::default();
let err = validate_extracted_api(&api, &config).expect_err("must stay fatal");
assert!(
err.to_string().contains("unknown_named_type"),
"unexpected error: {err}"
);
}
#[test]
fn is_type_excluded_plain_entry_matches_by_name() {
let exclude = vec!["OutputFormat".to_string()];
assert!(
is_type_excluded("OutputFormat", "sample_crate::types::OutputFormat", &exclude),
"plain entry must match when name matches"
);
assert!(
!is_type_excluded("SomethingElse", "sample_crate::types::SomethingElse", &exclude),
"plain entry must not match when name differs"
);
}
#[test]
fn is_type_excluded_qualified_entry_matches_rust_path_not_name() {
let exclude = vec!["sample_crate::core::config::formats::OutputFormat".to_string()];
assert!(
is_type_excluded(
"OutputFormat",
"sample_crate::core::config::formats::OutputFormat",
&exclude
),
"qualified entry must match the exact rust_path"
);
assert!(
!is_type_excluded("OutputFormat", "sample_crate::types::OutputFormat", &exclude),
"qualified entry must NOT match a different rust_path with the same short name"
);
}
#[test]
fn is_type_excluded_normalises_hyphens_in_rust_path() {
let exclude = vec!["my_crate::some_module::Foo".to_string()];
assert!(
is_type_excluded("Foo", "my-crate::some_module::Foo", &exclude),
"hyphens in rust_path should be normalised to underscores"
);
}
fn make_typedef(name: &str) -> crate::core::ir::TypeDef {
crate::core::ir::TypeDef {
name: name.to_string(),
rust_path: format!("my_crate::{name}"),
original_rust_path: String::new(),
fields: vec![],
methods: vec![],
is_opaque: false,
is_clone: false,
is_copy: false,
is_trait: false,
has_default: false,
has_stripped_cfg_fields: false,
is_return_type: false,
doc: String::new(),
cfg: None,
serde_rename_all: None,
has_serde: false,
super_traits: vec![],
binding_excluded: false,
binding_exclusion_reason: None,
is_variant_wrapper: false,
has_lifetime_params: false,
}
}
fn make_funcdef(name: &str, return_type: TypeRef, param_types: Vec<TypeRef>) -> crate::core::ir::FunctionDef {
crate::core::ir::FunctionDef {
name: name.to_string(),
rust_path: format!("my_crate::{name}"),
original_rust_path: String::new(),
params: param_types
.into_iter()
.enumerate()
.map(|(i, ty)| crate::core::ir::ParamDef {
name: format!("arg{i}"),
ty,
optional: false,
default: None,
sanitized: false,
typed_default: None,
is_ref: false,
is_mut: false,
newtype_wrapper: None,
original_type: None,
map_is_ahash: false,
map_key_is_cow: false,
vec_inner_is_ref: false,
map_is_btree: false,
core_wrapper: crate::core::ir::CoreWrapper::None,
})
.collect(),
return_type,
is_async: false,
error_type: None,
doc: String::new(),
cfg: None,
sanitized: false,
return_sanitized: false,
returns_ref: false,
returns_cow: false,
return_newtype_wrapper: None,
binding_excluded: false,
binding_exclusion_reason: None,
}
}
fn surface_with(types: Vec<crate::core::ir::TypeDef>, functions: Vec<crate::core::ir::FunctionDef>) -> ApiSurface {
ApiSurface {
crate_name: "my_crate".into(),
version: "0.1.0".into(),
types,
functions,
enums: vec![],
errors: vec![],
excluded_type_paths: std::collections::HashMap::new(),
excluded_trait_names: std::collections::HashSet::new(),
services: vec![],
handler_contracts: vec![],
unsupported_public_items: Vec::new(),
}
}
#[test]
fn expand_include_list_seeds_from_included_function_signatures() {
let surface = surface_with(
vec![
make_typedef("BatchScrapeResult"),
make_typedef("BatchScrapeResults"),
make_typedef("UnusedType"),
],
vec![make_funcdef(
"batch_scrape",
TypeRef::Named("BatchScrapeResults".into()),
vec![TypeRef::Vec(Box::new(TypeRef::String))],
)],
);
let include_types = vec!["BatchScrapeResult".to_string()];
let include_functions = vec!["batch_scrape".to_string()];
let expanded = expand_include_list(&surface, &include_types, &include_functions);
assert!(
expanded.contains("BatchScrapeResult"),
"per-element type explicitly listed must be present; got: {expanded:?}"
);
assert!(
expanded.contains("BatchScrapeResults"),
"wrapper return type of included function must be auto-included; got: {expanded:?}"
);
assert!(
!expanded.contains("UnusedType"),
"unrelated type must not be pulled in; got: {expanded:?}"
);
}
#[test]
fn expand_include_list_seeds_from_included_function_param_types() {
let surface = surface_with(
vec![make_typedef("CrawlConfig"), make_typedef("EngineHandle")],
vec![make_funcdef(
"create_engine",
TypeRef::Named("EngineHandle".into()),
vec![TypeRef::Optional(Box::new(TypeRef::Named("CrawlConfig".into())))],
)],
);
let include_types = vec!["EngineHandle".to_string()];
let include_functions = vec!["create_engine".to_string()];
let expanded = expand_include_list(&surface, &include_types, &include_functions);
assert!(
expanded.contains("CrawlConfig"),
"param type referenced through Optional must be retained; got: {expanded:?}"
);
}
#[test]
fn expand_include_list_with_empty_functions_matches_legacy_behaviour() {
let surface = surface_with(
vec![make_typedef("Kept"), make_typedef("Dropped")],
vec![make_funcdef("do_thing", TypeRef::Named("Dropped".into()), vec![])],
);
let include_types = vec!["Kept".to_string()];
let include_functions: Vec<String> = vec![];
let expanded = expand_include_list(&surface, &include_types, &include_functions);
assert!(expanded.contains("Kept"));
assert!(
!expanded.contains("Dropped"),
"function not in include.functions must not pull in its return type; got: {expanded:?}"
);
}
fn make_unsupported_method(type_name: &str, method_name: &str) -> crate::core::ir::UnsupportedPublicItem {
crate::core::ir::UnsupportedPublicItem {
item_kind: "method".to_string(),
item_path: format!("my_crate::module::{type_name}.{method_name}"),
reason: "public generic trait methods cannot be represented without explicit monomorphization metadata"
.to_string(),
suggested_fix: "exclude the method".to_string(),
}
}
fn make_unsupported_function(fn_name: &str) -> crate::core::ir::UnsupportedPublicItem {
crate::core::ir::UnsupportedPublicItem {
item_kind: "function".to_string(),
item_path: format!("my_crate::{fn_name}"),
reason: "generic function".to_string(),
suggested_fix: "exclude the function".to_string(),
}
}
#[test]
fn apply_filters_removes_unsupported_method_when_excluded_by_methods_list() {
let mut surface = surface_with(vec![], vec![]);
surface
.unsupported_public_items
.push(make_unsupported_method("NodeContext", "serialize"));
let mut config = ResolvedCrateConfig::default();
config.exclude.methods = vec!["NodeContext.serialize".to_string()];
let result = apply_filters(surface, &config);
assert!(
result.unsupported_public_items.is_empty(),
"method listed in exclude.methods must be removed from unsupported_public_items; \
remaining: {:?}",
result.unsupported_public_items
);
}
#[test]
fn apply_filters_retains_unsupported_method_when_not_in_exclude_list() {
let mut surface = surface_with(vec![], vec![]);
surface
.unsupported_public_items
.push(make_unsupported_method("NodeContext", "serialize"));
let mut config = ResolvedCrateConfig::default();
config.exclude.methods = vec!["NodeContext.other_method".to_string()];
let result = apply_filters(surface, &config);
assert_eq!(
result.unsupported_public_items.len(),
1,
"method NOT in exclude.methods must remain in unsupported_public_items"
);
}
#[test]
fn apply_filters_exclude_methods_does_not_affect_unsupported_function_items() {
let mut surface = surface_with(vec![], vec![]);
surface
.unsupported_public_items
.push(make_unsupported_function("generic_helper"));
let mut config = ResolvedCrateConfig::default();
config.exclude.methods = vec!["generic_helper".to_string()];
let result = apply_filters(surface, &config);
assert_eq!(
result.unsupported_public_items.len(),
1,
"exclude.methods must not suppress items with item_kind == 'function'"
);
}
#[test]
fn apply_filters_retains_unsupported_function_when_included_by_function_list() {
let mut surface = surface_with(vec![], vec![]);
surface
.unsupported_public_items
.push(make_unsupported_function("generic_helper"));
surface
.unsupported_public_items
.push(make_unsupported_function("unused_generic"));
let mut config = ResolvedCrateConfig::default();
config.include.functions = vec!["generic_helper".to_string()];
let result = apply_filters(surface, &config);
assert_eq!(
result
.unsupported_public_items
.iter()
.map(|item| item.item_path.as_str())
.collect::<Vec<_>>(),
vec!["my_crate::generic_helper"],
"include.functions must retain diagnostics only for included generic functions"
);
}
#[test]
fn apply_filters_retains_unsupported_method_when_parent_type_is_included() {
let mut surface = surface_with(vec![make_typedef("NodeContext"), make_typedef("OtherType")], vec![]);
surface
.unsupported_public_items
.push(make_unsupported_method("NodeContext", "serialize"));
surface
.unsupported_public_items
.push(make_unsupported_method("OtherType", "serialize"));
let mut config = ResolvedCrateConfig::default();
config.include.types = vec!["NodeContext".to_string()];
let result = apply_filters(surface, &config);
assert_eq!(
result
.unsupported_public_items
.iter()
.map(|item| item.item_path.as_str())
.collect::<Vec<_>>(),
vec!["my_crate::module::NodeContext.serialize"],
"include.types must retain diagnostics only for methods owned by included public types"
);
}
}