Skip to main content

alef_core/config/
mod.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::path::PathBuf;
4
5pub mod dto;
6pub mod e2e;
7pub mod extras;
8pub mod languages;
9pub mod output;
10
11// Re-exports for backward compatibility — all types were previously flat in config.rs.
12pub use dto::{
13    CsharpDtoStyle, DtoConfig, ElixirDtoStyle, GoDtoStyle, JavaDtoStyle, NodeDtoStyle, PhpDtoStyle, PythonDtoStyle,
14    RDtoStyle, RubyDtoStyle,
15};
16pub use e2e::E2eConfig;
17pub use extras::{AdapterConfig, AdapterParam, AdapterPattern, Language};
18pub use languages::{
19    CSharpConfig, CustomModulesConfig, CustomRegistration, CustomRegistrationsConfig, ElixirConfig, FfiConfig,
20    GoConfig, JavaConfig, NodeConfig, PhpConfig, PythonConfig, RConfig, RubyConfig, StubsConfig, WasmConfig,
21};
22pub use output::{
23    ExcludeConfig, IncludeConfig, LintConfig, OutputConfig, ReadmeConfig, ScaffoldConfig, SyncConfig, TestConfig,
24    TextReplacement,
25};
26
27/// Root configuration from alef.toml.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct AlefConfig {
30    #[serde(rename = "crate")]
31    pub crate_config: CrateConfig,
32    pub languages: Vec<Language>,
33    #[serde(default)]
34    pub exclude: ExcludeConfig,
35    #[serde(default)]
36    pub include: IncludeConfig,
37    #[serde(default)]
38    pub output: OutputConfig,
39    #[serde(default)]
40    pub python: Option<PythonConfig>,
41    #[serde(default)]
42    pub node: Option<NodeConfig>,
43    #[serde(default)]
44    pub ruby: Option<RubyConfig>,
45    #[serde(default)]
46    pub php: Option<PhpConfig>,
47    #[serde(default)]
48    pub elixir: Option<ElixirConfig>,
49    #[serde(default)]
50    pub wasm: Option<WasmConfig>,
51    #[serde(default)]
52    pub ffi: Option<FfiConfig>,
53    #[serde(default)]
54    pub go: Option<GoConfig>,
55    #[serde(default)]
56    pub java: Option<JavaConfig>,
57    #[serde(default)]
58    pub csharp: Option<CSharpConfig>,
59    #[serde(default)]
60    pub r: Option<RConfig>,
61    #[serde(default)]
62    pub scaffold: Option<ScaffoldConfig>,
63    #[serde(default)]
64    pub readme: Option<ReadmeConfig>,
65    #[serde(default)]
66    pub lint: Option<HashMap<String, LintConfig>>,
67    #[serde(default)]
68    pub test: Option<HashMap<String, TestConfig>>,
69    #[serde(default)]
70    pub custom_files: Option<HashMap<String, Vec<PathBuf>>>,
71    #[serde(default)]
72    pub adapters: Vec<AdapterConfig>,
73    #[serde(default)]
74    pub custom_modules: CustomModulesConfig,
75    #[serde(default)]
76    pub custom_registrations: CustomRegistrationsConfig,
77    #[serde(default)]
78    pub sync: Option<SyncConfig>,
79    /// Declare opaque types from external crates that alef can't extract.
80    /// Map of type name → Rust path (e.g., "Tree" = "tree_sitter_language_pack::Tree").
81    /// These get opaque wrapper structs in all backends.
82    #[serde(default)]
83    pub opaque_types: HashMap<String, String>,
84    /// Controls which generation passes alef runs (all default to true).
85    #[serde(default)]
86    pub generate: GenerateConfig,
87    /// Per-language overrides for generate flags (key = language name, e.g., "python").
88    #[serde(default)]
89    pub generate_overrides: HashMap<String, GenerateConfig>,
90    /// Per-language DTO/type generation style (dataclass vs TypedDict, zod vs interface, etc.).
91    #[serde(default)]
92    pub dto: DtoConfig,
93    /// E2E test generation configuration.
94    #[serde(default)]
95    pub e2e: Option<E2eConfig>,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct CrateConfig {
100    pub name: String,
101    pub sources: Vec<PathBuf>,
102    #[serde(default = "default_version_from")]
103    pub version_from: String,
104    #[serde(default)]
105    pub core_import: Option<String>,
106    /// Optional workspace root path for resolving `pub use` re-exports from sibling crates.
107    #[serde(default)]
108    pub workspace_root: Option<PathBuf>,
109    /// When true, skip adding `use {core_import};` to generated bindings.
110    #[serde(default)]
111    pub skip_core_import: bool,
112    /// Cargo features that are enabled in binding crates.
113    /// Fields gated by `#[cfg(feature = "...")]` matching these features
114    /// are treated as always-present (cfg stripped from the IR).
115    #[serde(default)]
116    pub features: Vec<String>,
117    /// Maps extracted rust_path prefixes to actual import paths in binding crates.
118    /// Example: { "spikard" = "spikard_http" } rewrites "spikard::ServerConfig" to "spikard_http::ServerConfig"
119    #[serde(default)]
120    pub path_mappings: HashMap<String, String>,
121}
122
123fn default_version_from() -> String {
124    "Cargo.toml".to_string()
125}
126
127fn default_true() -> bool {
128    true
129}
130
131/// Controls which generation passes alef runs.
132/// All flags default to `true`; set to `false` to skip a pass.
133/// Can be overridden per-language via `[generate_overrides.<lang>]`.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct GenerateConfig {
136    /// Generate low-level struct wrappers, From impls, module init (default: true)
137    #[serde(default = "default_true")]
138    pub bindings: bool,
139    /// Generate error type hierarchies from thiserror enums (default: true)
140    #[serde(default = "default_true")]
141    pub errors: bool,
142    /// Generate config builder constructors from Default types (default: true)
143    #[serde(default = "default_true")]
144    pub configs: bool,
145    /// Generate async/sync function pairs with runtime management (default: true)
146    #[serde(default = "default_true")]
147    pub async_wrappers: bool,
148    /// Generate recursive type marshaling helpers (default: true)
149    #[serde(default = "default_true")]
150    pub type_conversions: bool,
151    /// Generate package manifests (pyproject.toml, package.json, etc.) (default: true)
152    #[serde(default = "default_true")]
153    pub package_metadata: bool,
154    /// Generate idiomatic public API wrappers (default: true)
155    #[serde(default = "default_true")]
156    pub public_api: bool,
157}
158
159impl Default for GenerateConfig {
160    fn default() -> Self {
161        Self {
162            bindings: true,
163            errors: true,
164            configs: true,
165            async_wrappers: true,
166            type_conversions: true,
167            package_metadata: true,
168            public_api: true,
169        }
170    }
171}
172
173// ---------------------------------------------------------------------------
174// Shared config resolution helpers
175// ---------------------------------------------------------------------------
176
177impl AlefConfig {
178    /// Get the features to use for a specific language's binding crate.
179    /// Checks for a per-language override first, then falls back to `[crate] features`.
180    pub fn features_for_language(&self, lang: extras::Language) -> &[String] {
181        let override_features = match lang {
182            extras::Language::Python => self.python.as_ref().and_then(|c| c.features.as_deref()),
183            extras::Language::Node => self.node.as_ref().and_then(|c| c.features.as_deref()),
184            extras::Language::Ruby => self.ruby.as_ref().and_then(|c| c.features.as_deref()),
185            extras::Language::Php => self.php.as_ref().and_then(|c| c.features.as_deref()),
186            extras::Language::Elixir => self.elixir.as_ref().and_then(|c| c.features.as_deref()),
187            extras::Language::Wasm => self.wasm.as_ref().and_then(|c| c.features.as_deref()),
188            extras::Language::Ffi => self.ffi.as_ref().and_then(|c| c.features.as_deref()),
189            extras::Language::Go => self.go.as_ref().and_then(|c| c.features.as_deref()),
190            extras::Language::Java => self.java.as_ref().and_then(|c| c.features.as_deref()),
191            extras::Language::Csharp => self.csharp.as_ref().and_then(|c| c.features.as_deref()),
192            extras::Language::R => self.r.as_ref().and_then(|c| c.features.as_deref()),
193        };
194        override_features.unwrap_or(&self.crate_config.features)
195    }
196
197    /// Get the core crate import path (e.g., "liter_llm"). Used by codegen to call into the core crate.
198    pub fn core_import(&self) -> String {
199        self.crate_config
200            .core_import
201            .clone()
202            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
203    }
204
205    /// Get the FFI prefix (e.g., "kreuzberg"). Used by FFI, Go, Java, C# backends.
206    pub fn ffi_prefix(&self) -> String {
207        self.ffi
208            .as_ref()
209            .and_then(|f| f.prefix.as_ref())
210            .cloned()
211            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
212    }
213
214    /// Get the FFI native library name (for Go cgo, Java Panama, C# P/Invoke).
215    ///
216    /// Resolution order:
217    /// 1. `[ffi] lib_name` explicit override
218    /// 2. Directory name of `output.ffi` path with hyphens replaced by underscores
219    ///    (e.g. `crates/html-to-markdown-ffi/src/` → `html_to_markdown_ffi`)
220    /// 3. `{ffi_prefix}_ffi` fallback
221    pub fn ffi_lib_name(&self) -> String {
222        // 1. Explicit override in [ffi] section.
223        if let Some(name) = self.ffi.as_ref().and_then(|f| f.lib_name.as_ref()) {
224            return name.clone();
225        }
226
227        // 2. Derive from output.ffi path: take the last meaningful directory component
228        //    (skip trailing "src" or similar), then replace hyphens with underscores.
229        if let Some(ffi_path) = self.output.ffi.as_ref() {
230            let path = std::path::Path::new(ffi_path);
231            // Walk components from the end to find the crate directory name.
232            // Skip components like "src" that are inside the crate dir.
233            let components: Vec<_> = path
234                .components()
235                .filter_map(|c| {
236                    if let std::path::Component::Normal(s) = c {
237                        s.to_str()
238                    } else {
239                        None
240                    }
241                })
242                .collect();
243            // The crate name is typically the last component that looks like a crate dir
244            // (i.e. not "src", "lib", or similar). Search from the end.
245            let crate_dir = components
246                .iter()
247                .rev()
248                .find(|&&s| s != "src" && s != "lib" && s != "include")
249                .copied();
250            if let Some(dir) = crate_dir {
251                return dir.replace('-', "_");
252            }
253        }
254
255        // 3. Default fallback.
256        format!("{}_ffi", self.ffi_prefix())
257    }
258
259    /// Get the FFI header name.
260    pub fn ffi_header_name(&self) -> String {
261        self.ffi
262            .as_ref()
263            .and_then(|f| f.header_name.as_ref())
264            .cloned()
265            .unwrap_or_else(|| format!("{}.h", self.ffi_prefix()))
266    }
267
268    /// Get the Python module name.
269    pub fn python_module_name(&self) -> String {
270        self.python
271            .as_ref()
272            .and_then(|p| p.module_name.as_ref())
273            .cloned()
274            .unwrap_or_else(|| format!("_{}", self.crate_config.name.replace('-', "_")))
275    }
276
277    /// Get the Node package name.
278    pub fn node_package_name(&self) -> String {
279        self.node
280            .as_ref()
281            .and_then(|n| n.package_name.as_ref())
282            .cloned()
283            .unwrap_or_else(|| self.crate_config.name.clone())
284    }
285
286    /// Get the Ruby gem name.
287    pub fn ruby_gem_name(&self) -> String {
288        self.ruby
289            .as_ref()
290            .and_then(|r| r.gem_name.as_ref())
291            .cloned()
292            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
293    }
294
295    /// Get the PHP extension name.
296    pub fn php_extension_name(&self) -> String {
297        self.php
298            .as_ref()
299            .and_then(|p| p.extension_name.as_ref())
300            .cloned()
301            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
302    }
303
304    /// Get the Elixir app name.
305    pub fn elixir_app_name(&self) -> String {
306        self.elixir
307            .as_ref()
308            .and_then(|e| e.app_name.as_ref())
309            .cloned()
310            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
311    }
312
313    /// Get the Go module path.
314    pub fn go_module(&self) -> String {
315        self.go
316            .as_ref()
317            .and_then(|g| g.module.as_ref())
318            .cloned()
319            .unwrap_or_else(|| format!("github.com/kreuzberg-dev/{}", self.crate_config.name))
320    }
321
322    /// Get the Java package name.
323    pub fn java_package(&self) -> String {
324        self.java
325            .as_ref()
326            .and_then(|j| j.package.as_ref())
327            .cloned()
328            .unwrap_or_else(|| "dev.kreuzberg".to_string())
329    }
330
331    /// Get the Java Maven groupId.
332    ///
333    /// Uses the full Java package as the groupId, matching Maven convention
334    /// where groupId equals the package declaration.
335    pub fn java_group_id(&self) -> String {
336        self.java_package()
337    }
338
339    /// Get the C# namespace.
340    pub fn csharp_namespace(&self) -> String {
341        self.csharp
342            .as_ref()
343            .and_then(|c| c.namespace.as_ref())
344            .cloned()
345            .unwrap_or_else(|| {
346                use heck::ToPascalCase;
347                self.crate_config.name.to_pascal_case()
348            })
349    }
350
351    /// Get the directory name of the core crate (derived from sources or falling back to name).
352    ///
353    /// For example, if `sources` contains `"crates/html-to-markdown/src/lib.rs"`, this returns
354    /// `"html-to-markdown"`.  Used by the scaffold to generate correct `path = "../../crates/…"`
355    /// references in binding-crate `Cargo.toml` files.
356    pub fn core_crate_dir(&self) -> String {
357        // Try to derive from first source path: "crates/foo/src/types/config.rs" → "foo"
358        // Walk up from the file until we find the "src" directory, then take its parent.
359        if let Some(first_source) = self.crate_config.sources.first() {
360            let path = std::path::Path::new(first_source);
361            let mut current = path.parent();
362            while let Some(dir) = current {
363                if dir.file_name().is_some_and(|n| n == "src") {
364                    if let Some(crate_dir) = dir.parent() {
365                        if let Some(dir_name) = crate_dir.file_name() {
366                            return dir_name.to_string_lossy().into_owned();
367                        }
368                    }
369                    break;
370                }
371                current = dir.parent();
372            }
373        }
374        self.crate_config.name.clone()
375    }
376
377    /// Get the R package name.
378    pub fn r_package_name(&self) -> String {
379        self.r
380            .as_ref()
381            .and_then(|r| r.package_name.as_ref())
382            .cloned()
383            .unwrap_or_else(|| self.crate_config.name.clone())
384    }
385
386    /// Attempt to read the resolved version string from the configured `version_from` file.
387    /// Returns `None` if the file cannot be read or the version cannot be found.
388    pub fn resolved_version(&self) -> Option<String> {
389        let content = std::fs::read_to_string(&self.crate_config.version_from).ok()?;
390        let value: toml::Value = toml::from_str(&content).ok()?;
391        if let Some(v) = value
392            .get("workspace")
393            .and_then(|w| w.get("package"))
394            .and_then(|p| p.get("version"))
395            .and_then(|v| v.as_str())
396        {
397            return Some(v.to_string());
398        }
399        value
400            .get("package")
401            .and_then(|p| p.get("version"))
402            .and_then(|v| v.as_str())
403            .map(|v| v.to_string())
404    }
405
406    /// Get the effective serde rename_all strategy for a given language.
407    ///
408    /// Resolution order:
409    /// 1. Per-language config override (`[python] serde_rename_all = "..."`)
410    /// 2. Language default:
411    ///    - camelCase: node, wasm, java, csharp
412    ///    - snake_case: python, ruby, php, go, ffi, elixir, r
413    pub fn serde_rename_all_for_language(&self, lang: extras::Language) -> String {
414        // 1. Check per-language config override.
415        let override_val = match lang {
416            extras::Language::Python => self.python.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
417            extras::Language::Node => self.node.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
418            extras::Language::Ruby => self.ruby.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
419            extras::Language::Php => self.php.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
420            extras::Language::Elixir => self.elixir.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
421            extras::Language::Wasm => self.wasm.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
422            extras::Language::Ffi => self.ffi.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
423            extras::Language::Go => self.go.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
424            extras::Language::Java => self.java.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
425            extras::Language::Csharp => self.csharp.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
426            extras::Language::R => self.r.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
427        };
428
429        if let Some(val) = override_val {
430            return val.to_string();
431        }
432
433        // 2. Language defaults.
434        match lang {
435            extras::Language::Node | extras::Language::Wasm | extras::Language::Java | extras::Language::Csharp => {
436                "camelCase".to_string()
437            }
438            extras::Language::Python
439            | extras::Language::Ruby
440            | extras::Language::Php
441            | extras::Language::Go
442            | extras::Language::Ffi
443            | extras::Language::Elixir
444            | extras::Language::R => "snake_case".to_string(),
445        }
446    }
447
448    /// Rewrite a rust_path using path_mappings.
449    /// Matches the longest prefix first.
450    pub fn rewrite_path(&self, rust_path: &str) -> String {
451        // Sort mappings by key length descending (longest prefix first)
452        let mut mappings: Vec<_> = self.crate_config.path_mappings.iter().collect();
453        mappings.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
454
455        for (from, to) in &mappings {
456            if rust_path.starts_with(from.as_str()) {
457                return format!("{}{}", to, &rust_path[from.len()..]);
458            }
459        }
460        rust_path.to_string()
461    }
462}
463
464/// Helper function to resolve output directory path from config.
465/// Replaces {name} placeholder with the crate name.
466pub fn resolve_output_dir(config_path: Option<&PathBuf>, crate_name: &str, default: &str) -> String {
467    config_path
468        .map(|p| p.to_string_lossy().replace("{name}", crate_name))
469        .unwrap_or_else(|| default.replace("{name}", crate_name))
470}
471
472/// Detect whether `serde` and `serde_json` are available in a binding crate's Cargo.toml.
473///
474/// `output_dir` is the generated source directory (e.g., `crates/spikard-py/src/`).
475/// The function walks up to find the crate's Cargo.toml and checks its `[dependencies]`
476/// for both `serde` and `serde_json`.
477pub fn detect_serde_available(output_dir: &str) -> bool {
478    let src_path = std::path::Path::new(output_dir);
479    // Walk up from the output dir to find Cargo.toml (usually output_dir is `crates/foo/src/`)
480    let mut dir = src_path;
481    loop {
482        let cargo_toml = dir.join("Cargo.toml");
483        if cargo_toml.exists() {
484            return cargo_toml_has_serde(&cargo_toml);
485        }
486        match dir.parent() {
487            Some(parent) if !parent.as_os_str().is_empty() => dir = parent,
488            _ => break,
489        }
490    }
491    false
492}
493
494/// Check if a Cargo.toml has both `serde` (with derive feature) and `serde_json` in its dependencies.
495///
496/// The `serde::Serialize` derive macro requires `serde` as a direct dependency with the `derive`
497/// feature enabled. Having only `serde_json` is not sufficient since it only pulls in `serde`
498/// transitively without the derive proc-macro.
499fn cargo_toml_has_serde(path: &std::path::Path) -> bool {
500    let content = match std::fs::read_to_string(path) {
501        Ok(c) => c,
502        Err(_) => return false,
503    };
504
505    let has_serde_json = content.contains("serde_json");
506    // Check for `serde` as a direct dependency (not just serde_json).
507    // Must match "serde" as a TOML key, not as a substring of "serde_json".
508    // Valid patterns: `serde = `, `serde.`, `[dependencies.serde]`
509    let has_serde_dep = content.lines().any(|line| {
510        let trimmed = line.trim();
511        // Match `serde = ...` or `serde.workspace = true` etc., but not `serde_json`
512        trimmed.starts_with("serde ")
513            || trimmed.starts_with("serde=")
514            || trimmed.starts_with("serde.")
515            || trimmed == "[dependencies.serde]"
516    });
517
518    has_serde_json && has_serde_dep
519}