Skip to main content

alef_core/config/
mod.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::path::PathBuf;
4
5pub mod dto;
6pub mod e2e;
7pub mod extras;
8pub mod languages;
9pub mod output;
10
11// Re-exports for backward compatibility — all types were previously flat in config.rs.
12pub use dto::{
13    CsharpDtoStyle, DtoConfig, ElixirDtoStyle, GoDtoStyle, JavaDtoStyle, NodeDtoStyle, PhpDtoStyle, PythonDtoStyle,
14    RDtoStyle, RubyDtoStyle,
15};
16pub use e2e::E2eConfig;
17pub use extras::{AdapterConfig, AdapterParam, AdapterPattern, Language};
18pub use languages::{
19    CSharpConfig, CustomModulesConfig, CustomRegistration, CustomRegistrationsConfig, ElixirConfig, FfiConfig,
20    GoConfig, JavaConfig, NodeConfig, PhpConfig, PythonConfig, RConfig, RubyConfig, StubsConfig, WasmConfig,
21};
22pub use output::{
23    ExcludeConfig, IncludeConfig, LintConfig, OutputConfig, ReadmeConfig, ScaffoldConfig, SyncConfig, TestConfig,
24    TextReplacement,
25};
26
27/// Root configuration from alef.toml.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct AlefConfig {
30    #[serde(rename = "crate")]
31    pub crate_config: CrateConfig,
32    pub languages: Vec<Language>,
33    #[serde(default)]
34    pub exclude: ExcludeConfig,
35    #[serde(default)]
36    pub include: IncludeConfig,
37    #[serde(default)]
38    pub output: OutputConfig,
39    #[serde(default)]
40    pub python: Option<PythonConfig>,
41    #[serde(default)]
42    pub node: Option<NodeConfig>,
43    #[serde(default)]
44    pub ruby: Option<RubyConfig>,
45    #[serde(default)]
46    pub php: Option<PhpConfig>,
47    #[serde(default)]
48    pub elixir: Option<ElixirConfig>,
49    #[serde(default)]
50    pub wasm: Option<WasmConfig>,
51    #[serde(default)]
52    pub ffi: Option<FfiConfig>,
53    #[serde(default)]
54    pub go: Option<GoConfig>,
55    #[serde(default)]
56    pub java: Option<JavaConfig>,
57    #[serde(default)]
58    pub csharp: Option<CSharpConfig>,
59    #[serde(default)]
60    pub r: Option<RConfig>,
61    #[serde(default)]
62    pub scaffold: Option<ScaffoldConfig>,
63    #[serde(default)]
64    pub readme: Option<ReadmeConfig>,
65    #[serde(default)]
66    pub lint: Option<HashMap<String, LintConfig>>,
67    #[serde(default)]
68    pub test: Option<HashMap<String, TestConfig>>,
69    #[serde(default)]
70    pub custom_files: Option<HashMap<String, Vec<PathBuf>>>,
71    #[serde(default)]
72    pub adapters: Vec<AdapterConfig>,
73    #[serde(default)]
74    pub custom_modules: CustomModulesConfig,
75    #[serde(default)]
76    pub custom_registrations: CustomRegistrationsConfig,
77    #[serde(default)]
78    pub sync: Option<SyncConfig>,
79    /// Declare opaque types from external crates that alef can't extract.
80    /// Map of type name → Rust path (e.g., "Tree" = "tree_sitter_language_pack::Tree").
81    /// These get opaque wrapper structs in all backends.
82    #[serde(default)]
83    pub opaque_types: HashMap<String, String>,
84    /// Controls which generation passes alef runs (all default to true).
85    #[serde(default)]
86    pub generate: GenerateConfig,
87    /// Per-language overrides for generate flags (key = language name, e.g., "python").
88    #[serde(default)]
89    pub generate_overrides: HashMap<String, GenerateConfig>,
90    /// Per-language DTO/type generation style (dataclass vs TypedDict, zod vs interface, etc.).
91    #[serde(default)]
92    pub dto: DtoConfig,
93    /// E2E test generation configuration.
94    #[serde(default)]
95    pub e2e: Option<E2eConfig>,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct CrateConfig {
100    pub name: String,
101    pub sources: Vec<PathBuf>,
102    #[serde(default = "default_version_from")]
103    pub version_from: String,
104    #[serde(default)]
105    pub core_import: Option<String>,
106    /// Optional workspace root path for resolving `pub use` re-exports from sibling crates.
107    #[serde(default)]
108    pub workspace_root: Option<PathBuf>,
109    /// When true, skip adding `use {core_import};` to generated bindings.
110    #[serde(default)]
111    pub skip_core_import: bool,
112    /// Cargo features that are enabled in binding crates.
113    /// Fields gated by `#[cfg(feature = "...")]` matching these features
114    /// are treated as always-present (cfg stripped from the IR).
115    #[serde(default)]
116    pub features: Vec<String>,
117    /// Maps extracted rust_path prefixes to actual import paths in binding crates.
118    /// Example: { "spikard" = "spikard_http" } rewrites "spikard::ServerConfig" to "spikard_http::ServerConfig"
119    #[serde(default)]
120    pub path_mappings: HashMap<String, String>,
121}
122
123fn default_version_from() -> String {
124    "Cargo.toml".to_string()
125}
126
127fn default_true() -> bool {
128    true
129}
130
131/// Controls which generation passes alef runs.
132/// All flags default to `true`; set to `false` to skip a pass.
133/// Can be overridden per-language via `[generate_overrides.<lang>]`.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct GenerateConfig {
136    /// Generate low-level struct wrappers, From impls, module init (default: true)
137    #[serde(default = "default_true")]
138    pub bindings: bool,
139    /// Generate error type hierarchies from thiserror enums (default: true)
140    #[serde(default = "default_true")]
141    pub errors: bool,
142    /// Generate config builder constructors from Default types (default: true)
143    #[serde(default = "default_true")]
144    pub configs: bool,
145    /// Generate async/sync function pairs with runtime management (default: true)
146    #[serde(default = "default_true")]
147    pub async_wrappers: bool,
148    /// Generate recursive type marshaling helpers (default: true)
149    #[serde(default = "default_true")]
150    pub type_conversions: bool,
151    /// Generate package manifests (pyproject.toml, package.json, etc.) (default: true)
152    #[serde(default = "default_true")]
153    pub package_metadata: bool,
154    /// Generate idiomatic public API wrappers (default: true)
155    #[serde(default = "default_true")]
156    pub public_api: bool,
157    /// Generate `From<BindingType> for CoreType` reverse conversions (default: true).
158    /// Set to false when the binding layer only returns core types and never accepts them.
159    #[serde(default = "default_true")]
160    pub reverse_conversions: bool,
161}
162
163impl Default for GenerateConfig {
164    fn default() -> Self {
165        Self {
166            bindings: true,
167            errors: true,
168            configs: true,
169            async_wrappers: true,
170            type_conversions: true,
171            package_metadata: true,
172            public_api: true,
173            reverse_conversions: true,
174        }
175    }
176}
177
178// ---------------------------------------------------------------------------
179// Shared config resolution helpers
180// ---------------------------------------------------------------------------
181
182impl AlefConfig {
183    /// Get the features to use for a specific language's binding crate.
184    /// Checks for a per-language override first, then falls back to `[crate] features`.
185    pub fn features_for_language(&self, lang: extras::Language) -> &[String] {
186        let override_features = match lang {
187            extras::Language::Python => self.python.as_ref().and_then(|c| c.features.as_deref()),
188            extras::Language::Node => self.node.as_ref().and_then(|c| c.features.as_deref()),
189            extras::Language::Ruby => self.ruby.as_ref().and_then(|c| c.features.as_deref()),
190            extras::Language::Php => self.php.as_ref().and_then(|c| c.features.as_deref()),
191            extras::Language::Elixir => self.elixir.as_ref().and_then(|c| c.features.as_deref()),
192            extras::Language::Wasm => self.wasm.as_ref().and_then(|c| c.features.as_deref()),
193            extras::Language::Ffi => self.ffi.as_ref().and_then(|c| c.features.as_deref()),
194            extras::Language::Go => self.go.as_ref().and_then(|c| c.features.as_deref()),
195            extras::Language::Java => self.java.as_ref().and_then(|c| c.features.as_deref()),
196            extras::Language::Csharp => self.csharp.as_ref().and_then(|c| c.features.as_deref()),
197            extras::Language::R => self.r.as_ref().and_then(|c| c.features.as_deref()),
198            extras::Language::Rust => None, // Rust doesn't have binding-specific features
199        };
200        override_features.unwrap_or(&self.crate_config.features)
201    }
202
203    /// Get the core crate import path (e.g., "liter_llm"). Used by codegen to call into the core crate.
204    pub fn core_import(&self) -> String {
205        self.crate_config
206            .core_import
207            .clone()
208            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
209    }
210
211    /// Get the FFI prefix (e.g., "kreuzberg"). Used by FFI, Go, Java, C# backends.
212    pub fn ffi_prefix(&self) -> String {
213        self.ffi
214            .as_ref()
215            .and_then(|f| f.prefix.as_ref())
216            .cloned()
217            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
218    }
219
220    /// Get the FFI native library name (for Go cgo, Java Panama, C# P/Invoke).
221    ///
222    /// Resolution order:
223    /// 1. `[ffi] lib_name` explicit override
224    /// 2. Directory name of `output.ffi` path with hyphens replaced by underscores
225    ///    (e.g. `crates/html-to-markdown-ffi/src/` → `html_to_markdown_ffi`)
226    /// 3. `{ffi_prefix}_ffi` fallback
227    pub fn ffi_lib_name(&self) -> String {
228        // 1. Explicit override in [ffi] section.
229        if let Some(name) = self.ffi.as_ref().and_then(|f| f.lib_name.as_ref()) {
230            return name.clone();
231        }
232
233        // 2. Derive from output.ffi path: take the last meaningful directory component
234        //    (skip trailing "src" or similar), then replace hyphens with underscores.
235        if let Some(ffi_path) = self.output.ffi.as_ref() {
236            let path = std::path::Path::new(ffi_path);
237            // Walk components from the end to find the crate directory name.
238            // Skip components like "src" that are inside the crate dir.
239            let components: Vec<_> = path
240                .components()
241                .filter_map(|c| {
242                    if let std::path::Component::Normal(s) = c {
243                        s.to_str()
244                    } else {
245                        None
246                    }
247                })
248                .collect();
249            // The crate name is typically the last component that looks like a crate dir
250            // (i.e. not "src", "lib", or similar). Search from the end.
251            let crate_dir = components
252                .iter()
253                .rev()
254                .find(|&&s| s != "src" && s != "lib" && s != "include")
255                .copied();
256            if let Some(dir) = crate_dir {
257                return dir.replace('-', "_");
258            }
259        }
260
261        // 3. Default fallback.
262        format!("{}_ffi", self.ffi_prefix())
263    }
264
265    /// Get the FFI header name.
266    pub fn ffi_header_name(&self) -> String {
267        self.ffi
268            .as_ref()
269            .and_then(|f| f.header_name.as_ref())
270            .cloned()
271            .unwrap_or_else(|| format!("{}.h", self.ffi_prefix()))
272    }
273
274    /// Get the Python module name.
275    pub fn python_module_name(&self) -> String {
276        self.python
277            .as_ref()
278            .and_then(|p| p.module_name.as_ref())
279            .cloned()
280            .unwrap_or_else(|| format!("_{}", self.crate_config.name.replace('-', "_")))
281    }
282
283    /// Get the PyPI package name used as `[project] name` in `pyproject.toml`.
284    ///
285    /// Returns `[python] pip_name` if set, otherwise falls back to the crate name.
286    pub fn python_pip_name(&self) -> String {
287        self.python
288            .as_ref()
289            .and_then(|p| p.pip_name.as_ref())
290            .cloned()
291            .unwrap_or_else(|| self.crate_config.name.clone())
292    }
293
294    /// Get the PHP Composer autoload namespace derived from the extension name.
295    ///
296    /// Converts the extension name (e.g. `html_to_markdown_rs`) into a
297    /// PSR-4 namespace string (e.g. `Html\\To\\Markdown\\Rs`).
298    pub fn php_autoload_namespace(&self) -> String {
299        use heck::ToPascalCase;
300        let ext = self.php_extension_name();
301        if ext.contains('_') {
302            ext.split('_')
303                .map(|p| p.to_pascal_case())
304                .collect::<Vec<_>>()
305                .join("\\")
306        } else {
307            ext.to_pascal_case()
308        }
309    }
310
311    /// Get the Node package name.
312    pub fn node_package_name(&self) -> String {
313        self.node
314            .as_ref()
315            .and_then(|n| n.package_name.as_ref())
316            .cloned()
317            .unwrap_or_else(|| self.crate_config.name.clone())
318    }
319
320    /// Get the Ruby gem name.
321    pub fn ruby_gem_name(&self) -> String {
322        self.ruby
323            .as_ref()
324            .and_then(|r| r.gem_name.as_ref())
325            .cloned()
326            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
327    }
328
329    /// Get the PHP extension name.
330    pub fn php_extension_name(&self) -> String {
331        self.php
332            .as_ref()
333            .and_then(|p| p.extension_name.as_ref())
334            .cloned()
335            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
336    }
337
338    /// Get the Elixir app name.
339    pub fn elixir_app_name(&self) -> String {
340        self.elixir
341            .as_ref()
342            .and_then(|e| e.app_name.as_ref())
343            .cloned()
344            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
345    }
346
347    /// Get the Go module path.
348    pub fn go_module(&self) -> String {
349        self.go
350            .as_ref()
351            .and_then(|g| g.module.as_ref())
352            .cloned()
353            .unwrap_or_else(|| format!("github.com/kreuzberg-dev/{}", self.crate_config.name))
354    }
355
356    /// Get the Java package name.
357    pub fn java_package(&self) -> String {
358        self.java
359            .as_ref()
360            .and_then(|j| j.package.as_ref())
361            .cloned()
362            .unwrap_or_else(|| "dev.kreuzberg".to_string())
363    }
364
365    /// Get the Java Maven groupId.
366    ///
367    /// Uses the full Java package as the groupId, matching Maven convention
368    /// where groupId equals the package declaration.
369    pub fn java_group_id(&self) -> String {
370        self.java_package()
371    }
372
373    /// Get the C# namespace.
374    pub fn csharp_namespace(&self) -> String {
375        self.csharp
376            .as_ref()
377            .and_then(|c| c.namespace.as_ref())
378            .cloned()
379            .unwrap_or_else(|| {
380                use heck::ToPascalCase;
381                self.crate_config.name.to_pascal_case()
382            })
383    }
384
385    /// Get the directory name of the core crate (derived from sources or falling back to name).
386    ///
387    /// For example, if `sources` contains `"crates/html-to-markdown/src/lib.rs"`, this returns
388    /// `"html-to-markdown"`.  Used by the scaffold to generate correct `path = "../../crates/…"`
389    /// references in binding-crate `Cargo.toml` files.
390    pub fn core_crate_dir(&self) -> String {
391        // Try to derive from first source path: "crates/foo/src/types/config.rs" → "foo"
392        // Walk up from the file until we find the "src" directory, then take its parent.
393        if let Some(first_source) = self.crate_config.sources.first() {
394            let path = std::path::Path::new(first_source);
395            let mut current = path.parent();
396            while let Some(dir) = current {
397                if dir.file_name().is_some_and(|n| n == "src") {
398                    if let Some(crate_dir) = dir.parent() {
399                        if let Some(dir_name) = crate_dir.file_name() {
400                            return dir_name.to_string_lossy().into_owned();
401                        }
402                    }
403                    break;
404                }
405                current = dir.parent();
406            }
407        }
408        self.crate_config.name.clone()
409    }
410
411    /// Get the R package name.
412    pub fn r_package_name(&self) -> String {
413        self.r
414            .as_ref()
415            .and_then(|r| r.package_name.as_ref())
416            .cloned()
417            .unwrap_or_else(|| self.crate_config.name.clone())
418    }
419
420    /// Attempt to read the resolved version string from the configured `version_from` file.
421    /// Returns `None` if the file cannot be read or the version cannot be found.
422    pub fn resolved_version(&self) -> Option<String> {
423        let content = std::fs::read_to_string(&self.crate_config.version_from).ok()?;
424        let value: toml::Value = toml::from_str(&content).ok()?;
425        if let Some(v) = value
426            .get("workspace")
427            .and_then(|w| w.get("package"))
428            .and_then(|p| p.get("version"))
429            .and_then(|v| v.as_str())
430        {
431            return Some(v.to_string());
432        }
433        value
434            .get("package")
435            .and_then(|p| p.get("version"))
436            .and_then(|v| v.as_str())
437            .map(|v| v.to_string())
438    }
439
440    /// Get the effective serde rename_all strategy for a given language.
441    ///
442    /// Resolution order:
443    /// 1. Per-language config override (`[python] serde_rename_all = "..."`)
444    /// 2. Language default:
445    ///    - camelCase: node, wasm, java, csharp
446    ///    - snake_case: python, ruby, php, go, ffi, elixir, r
447    pub fn serde_rename_all_for_language(&self, lang: extras::Language) -> String {
448        // 1. Check per-language config override.
449        let override_val = match lang {
450            extras::Language::Python => self.python.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
451            extras::Language::Node => self.node.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
452            extras::Language::Ruby => self.ruby.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
453            extras::Language::Php => self.php.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
454            extras::Language::Elixir => self.elixir.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
455            extras::Language::Wasm => self.wasm.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
456            extras::Language::Ffi => self.ffi.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
457            extras::Language::Go => self.go.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
458            extras::Language::Java => self.java.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
459            extras::Language::Csharp => self.csharp.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
460            extras::Language::R => self.r.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
461            extras::Language::Rust => None, // Rust uses native naming (snake_case)
462        };
463
464        if let Some(val) = override_val {
465            return val.to_string();
466        }
467
468        // 2. Language defaults.
469        match lang {
470            extras::Language::Node | extras::Language::Wasm | extras::Language::Java | extras::Language::Csharp => {
471                "camelCase".to_string()
472            }
473            extras::Language::Python
474            | extras::Language::Ruby
475            | extras::Language::Php
476            | extras::Language::Go
477            | extras::Language::Ffi
478            | extras::Language::Elixir
479            | extras::Language::R
480            | extras::Language::Rust => "snake_case".to_string(),
481        }
482    }
483
484    /// Rewrite a rust_path using path_mappings.
485    /// Matches the longest prefix first.
486    pub fn rewrite_path(&self, rust_path: &str) -> String {
487        // Sort mappings by key length descending (longest prefix first)
488        let mut mappings: Vec<_> = self.crate_config.path_mappings.iter().collect();
489        mappings.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
490
491        for (from, to) in &mappings {
492            if rust_path.starts_with(from.as_str()) {
493                return format!("{}{}", to, &rust_path[from.len()..]);
494            }
495        }
496        rust_path.to_string()
497    }
498}
499
500/// Helper function to resolve output directory path from config.
501/// Replaces {name} placeholder with the crate name.
502pub fn resolve_output_dir(config_path: Option<&PathBuf>, crate_name: &str, default: &str) -> String {
503    config_path
504        .map(|p| p.to_string_lossy().replace("{name}", crate_name))
505        .unwrap_or_else(|| default.replace("{name}", crate_name))
506}
507
508/// Detect whether `serde` and `serde_json` are available in a binding crate's Cargo.toml.
509///
510/// `output_dir` is the generated source directory (e.g., `crates/spikard-py/src/`).
511/// The function walks up to find the crate's Cargo.toml and checks its `[dependencies]`
512/// for both `serde` and `serde_json`.
513pub fn detect_serde_available(output_dir: &str) -> bool {
514    let src_path = std::path::Path::new(output_dir);
515    // Walk up from the output dir to find Cargo.toml (usually output_dir is `crates/foo/src/`)
516    let mut dir = src_path;
517    loop {
518        let cargo_toml = dir.join("Cargo.toml");
519        if cargo_toml.exists() {
520            return cargo_toml_has_serde(&cargo_toml);
521        }
522        match dir.parent() {
523            Some(parent) if !parent.as_os_str().is_empty() => dir = parent,
524            _ => break,
525        }
526    }
527    false
528}
529
530/// Check if a Cargo.toml has both `serde` (with derive feature) and `serde_json` in its dependencies.
531///
532/// The `serde::Serialize` derive macro requires `serde` as a direct dependency with the `derive`
533/// feature enabled. Having only `serde_json` is not sufficient since it only pulls in `serde`
534/// transitively without the derive proc-macro.
535fn cargo_toml_has_serde(path: &std::path::Path) -> bool {
536    let content = match std::fs::read_to_string(path) {
537        Ok(c) => c,
538        Err(_) => return false,
539    };
540
541    let has_serde_json = content.contains("serde_json");
542    // Check for `serde` as a direct dependency (not just serde_json).
543    // Must match "serde" as a TOML key, not as a substring of "serde_json".
544    // Valid patterns: `serde = `, `serde.`, `[dependencies.serde]`
545    let has_serde_dep = content.lines().any(|line| {
546        let trimmed = line.trim();
547        // Match `serde = ...` or `serde.workspace = true` etc., but not `serde_json`
548        trimmed.starts_with("serde ")
549            || trimmed.starts_with("serde=")
550            || trimmed.starts_with("serde.")
551            || trimmed == "[dependencies.serde]"
552    });
553
554    has_serde_json && has_serde_dep
555}