Skip to main content

alef_core/config/
mod.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::path::PathBuf;
4
5pub mod dto;
6pub mod e2e;
7pub mod extras;
8pub mod languages;
9pub mod output;
10
11// Re-exports for backward compatibility — all types were previously flat in config.rs.
12pub use dto::{
13    CsharpDtoStyle, DtoConfig, ElixirDtoStyle, GoDtoStyle, JavaDtoStyle, NodeDtoStyle, PhpDtoStyle, PythonDtoStyle,
14    RDtoStyle, RubyDtoStyle,
15};
16pub use e2e::E2eConfig;
17pub use extras::{AdapterConfig, AdapterParam, AdapterPattern, Language};
18pub use languages::{
19    CSharpConfig, CustomModulesConfig, CustomRegistration, CustomRegistrationsConfig, ElixirConfig, FfiConfig,
20    GoConfig, JavaConfig, NodeConfig, PhpConfig, PythonConfig, RConfig, RubyConfig, StubsConfig, WasmConfig,
21};
22pub use output::{
23    ExcludeConfig, IncludeConfig, LintConfig, OutputConfig, ReadmeConfig, ScaffoldConfig, SyncConfig, TestConfig,
24    TextReplacement,
25};
26
27/// Root configuration from alef.toml.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct AlefConfig {
30    #[serde(rename = "crate")]
31    pub crate_config: CrateConfig,
32    pub languages: Vec<Language>,
33    #[serde(default)]
34    pub exclude: ExcludeConfig,
35    #[serde(default)]
36    pub include: IncludeConfig,
37    #[serde(default)]
38    pub output: OutputConfig,
39    #[serde(default)]
40    pub python: Option<PythonConfig>,
41    #[serde(default)]
42    pub node: Option<NodeConfig>,
43    #[serde(default)]
44    pub ruby: Option<RubyConfig>,
45    #[serde(default)]
46    pub php: Option<PhpConfig>,
47    #[serde(default)]
48    pub elixir: Option<ElixirConfig>,
49    #[serde(default)]
50    pub wasm: Option<WasmConfig>,
51    #[serde(default)]
52    pub ffi: Option<FfiConfig>,
53    #[serde(default)]
54    pub go: Option<GoConfig>,
55    #[serde(default)]
56    pub java: Option<JavaConfig>,
57    #[serde(default)]
58    pub csharp: Option<CSharpConfig>,
59    #[serde(default)]
60    pub r: Option<RConfig>,
61    #[serde(default)]
62    pub scaffold: Option<ScaffoldConfig>,
63    #[serde(default)]
64    pub readme: Option<ReadmeConfig>,
65    #[serde(default)]
66    pub lint: Option<HashMap<String, LintConfig>>,
67    #[serde(default)]
68    pub test: Option<HashMap<String, TestConfig>>,
69    #[serde(default)]
70    pub custom_files: Option<HashMap<String, Vec<PathBuf>>>,
71    #[serde(default)]
72    pub adapters: Vec<AdapterConfig>,
73    #[serde(default)]
74    pub custom_modules: CustomModulesConfig,
75    #[serde(default)]
76    pub custom_registrations: CustomRegistrationsConfig,
77    #[serde(default)]
78    pub sync: Option<SyncConfig>,
79    /// Declare opaque types from external crates that alef can't extract.
80    /// Map of type name → Rust path (e.g., "Tree" = "tree_sitter_language_pack::Tree").
81    /// These get opaque wrapper structs in all backends.
82    #[serde(default)]
83    pub opaque_types: HashMap<String, String>,
84    /// Controls which generation passes alef runs (all default to true).
85    #[serde(default)]
86    pub generate: GenerateConfig,
87    /// Per-language overrides for generate flags (key = language name, e.g., "python").
88    #[serde(default)]
89    pub generate_overrides: HashMap<String, GenerateConfig>,
90    /// Per-language DTO/type generation style (dataclass vs TypedDict, zod vs interface, etc.).
91    #[serde(default)]
92    pub dto: DtoConfig,
93    /// E2E test generation configuration.
94    #[serde(default)]
95    pub e2e: Option<E2eConfig>,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct CrateConfig {
100    pub name: String,
101    pub sources: Vec<PathBuf>,
102    #[serde(default = "default_version_from")]
103    pub version_from: String,
104    #[serde(default)]
105    pub core_import: Option<String>,
106    /// Optional workspace root path for resolving `pub use` re-exports from sibling crates.
107    #[serde(default)]
108    pub workspace_root: Option<PathBuf>,
109    /// When true, skip adding `use {core_import};` to generated bindings.
110    #[serde(default)]
111    pub skip_core_import: bool,
112    /// Cargo features that are enabled in binding crates.
113    /// Fields gated by `#[cfg(feature = "...")]` matching these features
114    /// are treated as always-present (cfg stripped from the IR).
115    #[serde(default)]
116    pub features: Vec<String>,
117    /// Maps extracted rust_path prefixes to actual import paths in binding crates.
118    /// Example: { "spikard" = "spikard_http" } rewrites "spikard::ServerConfig" to "spikard_http::ServerConfig"
119    #[serde(default)]
120    pub path_mappings: HashMap<String, String>,
121}
122
123fn default_version_from() -> String {
124    "Cargo.toml".to_string()
125}
126
127fn default_true() -> bool {
128    true
129}
130
131/// Controls which generation passes alef runs.
132/// All flags default to `true`; set to `false` to skip a pass.
133/// Can be overridden per-language via `[generate_overrides.<lang>]`.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct GenerateConfig {
136    /// Generate low-level struct wrappers, From impls, module init (default: true)
137    #[serde(default = "default_true")]
138    pub bindings: bool,
139    /// Generate error type hierarchies from thiserror enums (default: true)
140    #[serde(default = "default_true")]
141    pub errors: bool,
142    /// Generate config builder constructors from Default types (default: true)
143    #[serde(default = "default_true")]
144    pub configs: bool,
145    /// Generate async/sync function pairs with runtime management (default: true)
146    #[serde(default = "default_true")]
147    pub async_wrappers: bool,
148    /// Generate recursive type marshaling helpers (default: true)
149    #[serde(default = "default_true")]
150    pub type_conversions: bool,
151    /// Generate package manifests (pyproject.toml, package.json, etc.) (default: true)
152    #[serde(default = "default_true")]
153    pub package_metadata: bool,
154    /// Generate idiomatic public API wrappers (default: true)
155    #[serde(default = "default_true")]
156    pub public_api: bool,
157    /// Generate `From<BindingType> for CoreType` reverse conversions (default: true).
158    /// Set to false when the binding layer only returns core types and never accepts them.
159    #[serde(default = "default_true")]
160    pub reverse_conversions: bool,
161}
162
163impl Default for GenerateConfig {
164    fn default() -> Self {
165        Self {
166            bindings: true,
167            errors: true,
168            configs: true,
169            async_wrappers: true,
170            type_conversions: true,
171            package_metadata: true,
172            public_api: true,
173            reverse_conversions: true,
174        }
175    }
176}
177
178// ---------------------------------------------------------------------------
179// Shared config resolution helpers
180// ---------------------------------------------------------------------------
181
182impl AlefConfig {
183    /// Get the features to use for a specific language's binding crate.
184    /// Checks for a per-language override first, then falls back to `[crate] features`.
185    pub fn features_for_language(&self, lang: extras::Language) -> &[String] {
186        let override_features = match lang {
187            extras::Language::Python => self.python.as_ref().and_then(|c| c.features.as_deref()),
188            extras::Language::Node => self.node.as_ref().and_then(|c| c.features.as_deref()),
189            extras::Language::Ruby => self.ruby.as_ref().and_then(|c| c.features.as_deref()),
190            extras::Language::Php => self.php.as_ref().and_then(|c| c.features.as_deref()),
191            extras::Language::Elixir => self.elixir.as_ref().and_then(|c| c.features.as_deref()),
192            extras::Language::Wasm => self.wasm.as_ref().and_then(|c| c.features.as_deref()),
193            extras::Language::Ffi => self.ffi.as_ref().and_then(|c| c.features.as_deref()),
194            extras::Language::Go => self.go.as_ref().and_then(|c| c.features.as_deref()),
195            extras::Language::Java => self.java.as_ref().and_then(|c| c.features.as_deref()),
196            extras::Language::Csharp => self.csharp.as_ref().and_then(|c| c.features.as_deref()),
197            extras::Language::R => self.r.as_ref().and_then(|c| c.features.as_deref()),
198            extras::Language::Rust => None, // Rust doesn't have binding-specific features
199        };
200        override_features.unwrap_or(&self.crate_config.features)
201    }
202
203    /// Get the core crate import path (e.g., "liter_llm"). Used by codegen to call into the core crate.
204    pub fn core_import(&self) -> String {
205        self.crate_config
206            .core_import
207            .clone()
208            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
209    }
210
211    /// Get the FFI prefix (e.g., "kreuzberg"). Used by FFI, Go, Java, C# backends.
212    pub fn ffi_prefix(&self) -> String {
213        self.ffi
214            .as_ref()
215            .and_then(|f| f.prefix.as_ref())
216            .cloned()
217            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
218    }
219
220    /// Get the FFI native library name (for Go cgo, Java Panama, C# P/Invoke).
221    ///
222    /// Resolution order:
223    /// 1. `[ffi] lib_name` explicit override
224    /// 2. Directory name of `output.ffi` path with hyphens replaced by underscores
225    ///    (e.g. `crates/html-to-markdown-ffi/src/` → `html_to_markdown_ffi`)
226    /// 3. `{ffi_prefix}_ffi` fallback
227    pub fn ffi_lib_name(&self) -> String {
228        // 1. Explicit override in [ffi] section.
229        if let Some(name) = self.ffi.as_ref().and_then(|f| f.lib_name.as_ref()) {
230            return name.clone();
231        }
232
233        // 2. Derive from output.ffi path: take the last meaningful directory component
234        //    (skip trailing "src" or similar), then replace hyphens with underscores.
235        if let Some(ffi_path) = self.output.ffi.as_ref() {
236            let path = std::path::Path::new(ffi_path);
237            // Walk components from the end to find the crate directory name.
238            // Skip components like "src" that are inside the crate dir.
239            let components: Vec<_> = path
240                .components()
241                .filter_map(|c| {
242                    if let std::path::Component::Normal(s) = c {
243                        s.to_str()
244                    } else {
245                        None
246                    }
247                })
248                .collect();
249            // The crate name is typically the last component that looks like a crate dir
250            // (i.e. not "src", "lib", or similar). Search from the end.
251            let crate_dir = components
252                .iter()
253                .rev()
254                .find(|&&s| s != "src" && s != "lib" && s != "include")
255                .copied();
256            if let Some(dir) = crate_dir {
257                return dir.replace('-', "_");
258            }
259        }
260
261        // 3. Default fallback.
262        format!("{}_ffi", self.ffi_prefix())
263    }
264
265    /// Get the FFI header name.
266    pub fn ffi_header_name(&self) -> String {
267        self.ffi
268            .as_ref()
269            .and_then(|f| f.header_name.as_ref())
270            .cloned()
271            .unwrap_or_else(|| format!("{}.h", self.ffi_prefix()))
272    }
273
274    /// Get the Python module name.
275    pub fn python_module_name(&self) -> String {
276        self.python
277            .as_ref()
278            .and_then(|p| p.module_name.as_ref())
279            .cloned()
280            .unwrap_or_else(|| format!("_{}", self.crate_config.name.replace('-', "_")))
281    }
282
283    /// Get the PyPI package name used as `[project] name` in `pyproject.toml`.
284    ///
285    /// Returns `[python] pip_name` if set, otherwise falls back to the crate name.
286    pub fn python_pip_name(&self) -> String {
287        self.python
288            .as_ref()
289            .and_then(|p| p.pip_name.as_ref())
290            .cloned()
291            .unwrap_or_else(|| self.crate_config.name.clone())
292    }
293
294    /// Get the PHP Composer autoload namespace derived from the extension name.
295    ///
296    /// Converts the extension name (e.g. `html_to_markdown_rs`) into a
297    /// PSR-4 namespace string (e.g. `Html\\To\\Markdown\\Rs`).
298    pub fn php_autoload_namespace(&self) -> String {
299        use heck::ToPascalCase;
300        let ext = self.php_extension_name();
301        if ext.contains('_') {
302            ext.split('_')
303                .map(|p| p.to_pascal_case())
304                .collect::<Vec<_>>()
305                .join("\\")
306        } else {
307            ext.to_pascal_case()
308        }
309    }
310
311    /// Get the Node package name.
312    pub fn node_package_name(&self) -> String {
313        self.node
314            .as_ref()
315            .and_then(|n| n.package_name.as_ref())
316            .cloned()
317            .unwrap_or_else(|| self.crate_config.name.clone())
318    }
319
320    /// Get the Ruby gem name.
321    pub fn ruby_gem_name(&self) -> String {
322        self.ruby
323            .as_ref()
324            .and_then(|r| r.gem_name.as_ref())
325            .cloned()
326            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
327    }
328
329    /// Get the PHP extension name.
330    pub fn php_extension_name(&self) -> String {
331        self.php
332            .as_ref()
333            .and_then(|p| p.extension_name.as_ref())
334            .cloned()
335            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
336    }
337
338    /// Get the Elixir app name.
339    pub fn elixir_app_name(&self) -> String {
340        self.elixir
341            .as_ref()
342            .and_then(|e| e.app_name.as_ref())
343            .cloned()
344            .unwrap_or_else(|| self.crate_config.name.replace('-', "_"))
345    }
346
347    /// Get the Go module path.
348    pub fn go_module(&self) -> String {
349        self.go
350            .as_ref()
351            .and_then(|g| g.module.as_ref())
352            .cloned()
353            .unwrap_or_else(|| format!("github.com/kreuzberg-dev/{}", self.crate_config.name))
354    }
355
356    /// Get the GitHub repository URL.
357    ///
358    /// Resolution order:
359    /// 1. `[e2e.registry] github_repo`
360    /// 2. `[scaffold] repository`
361    /// 3. Default: `https://github.com/kreuzberg-dev/{crate.name}`
362    pub fn github_repo(&self) -> String {
363        if let Some(e2e) = &self.e2e {
364            if let Some(url) = &e2e.registry.github_repo {
365                return url.clone();
366            }
367        }
368        self.scaffold
369            .as_ref()
370            .and_then(|s| s.repository.as_ref())
371            .cloned()
372            .unwrap_or_else(|| format!("https://github.com/kreuzberg-dev/{}", self.crate_config.name))
373    }
374
375    /// Get the Java package name.
376    pub fn java_package(&self) -> String {
377        self.java
378            .as_ref()
379            .and_then(|j| j.package.as_ref())
380            .cloned()
381            .unwrap_or_else(|| "dev.kreuzberg".to_string())
382    }
383
384    /// Get the Java Maven groupId.
385    ///
386    /// Uses the full Java package as the groupId, matching Maven convention
387    /// where groupId equals the package declaration.
388    pub fn java_group_id(&self) -> String {
389        self.java_package()
390    }
391
392    /// Get the C# namespace.
393    pub fn csharp_namespace(&self) -> String {
394        self.csharp
395            .as_ref()
396            .and_then(|c| c.namespace.as_ref())
397            .cloned()
398            .unwrap_or_else(|| {
399                use heck::ToPascalCase;
400                self.crate_config.name.to_pascal_case()
401            })
402    }
403
404    /// Get the directory name of the core crate (derived from sources or falling back to name).
405    ///
406    /// For example, if `sources` contains `"crates/html-to-markdown/src/lib.rs"`, this returns
407    /// `"html-to-markdown"`.  Used by the scaffold to generate correct `path = "../../crates/…"`
408    /// references in binding-crate `Cargo.toml` files.
409    pub fn core_crate_dir(&self) -> String {
410        // Try to derive from first source path: "crates/foo/src/types/config.rs" → "foo"
411        // Walk up from the file until we find the "src" directory, then take its parent.
412        if let Some(first_source) = self.crate_config.sources.first() {
413            let path = std::path::Path::new(first_source);
414            let mut current = path.parent();
415            while let Some(dir) = current {
416                if dir.file_name().is_some_and(|n| n == "src") {
417                    if let Some(crate_dir) = dir.parent() {
418                        if let Some(dir_name) = crate_dir.file_name() {
419                            return dir_name.to_string_lossy().into_owned();
420                        }
421                    }
422                    break;
423                }
424                current = dir.parent();
425            }
426        }
427        self.crate_config.name.clone()
428    }
429
430    /// Get the WASM type name prefix (e.g. "Wasm" produces `WasmConversionOptions`).
431    /// Defaults to `"Wasm"`.
432    pub fn wasm_type_prefix(&self) -> String {
433        self.wasm
434            .as_ref()
435            .and_then(|w| w.type_prefix.as_ref())
436            .cloned()
437            .unwrap_or_else(|| "Wasm".to_string())
438    }
439
440    /// Get the Node/NAPI type name prefix (e.g. "Js" produces `JsConversionOptions`).
441    /// Defaults to `"Js"`.
442    pub fn node_type_prefix(&self) -> String {
443        self.node
444            .as_ref()
445            .and_then(|n| n.type_prefix.as_ref())
446            .cloned()
447            .unwrap_or_else(|| "Js".to_string())
448    }
449
450    /// Get the R package name.
451    pub fn r_package_name(&self) -> String {
452        self.r
453            .as_ref()
454            .and_then(|r| r.package_name.as_ref())
455            .cloned()
456            .unwrap_or_else(|| self.crate_config.name.clone())
457    }
458
459    /// Attempt to read the resolved version string from the configured `version_from` file.
460    /// Returns `None` if the file cannot be read or the version cannot be found.
461    pub fn resolved_version(&self) -> Option<String> {
462        let content = std::fs::read_to_string(&self.crate_config.version_from).ok()?;
463        let value: toml::Value = toml::from_str(&content).ok()?;
464        if let Some(v) = value
465            .get("workspace")
466            .and_then(|w| w.get("package"))
467            .and_then(|p| p.get("version"))
468            .and_then(|v| v.as_str())
469        {
470            return Some(v.to_string());
471        }
472        value
473            .get("package")
474            .and_then(|p| p.get("version"))
475            .and_then(|v| v.as_str())
476            .map(|v| v.to_string())
477    }
478
479    /// Get the effective serde rename_all strategy for a given language.
480    ///
481    /// Resolution order:
482    /// 1. Per-language config override (`[python] serde_rename_all = "..."`)
483    /// 2. Language default:
484    ///    - camelCase: node, wasm, java, csharp
485    ///    - snake_case: python, ruby, php, go, ffi, elixir, r
486    pub fn serde_rename_all_for_language(&self, lang: extras::Language) -> String {
487        // 1. Check per-language config override.
488        let override_val = match lang {
489            extras::Language::Python => self.python.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
490            extras::Language::Node => self.node.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
491            extras::Language::Ruby => self.ruby.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
492            extras::Language::Php => self.php.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
493            extras::Language::Elixir => self.elixir.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
494            extras::Language::Wasm => self.wasm.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
495            extras::Language::Ffi => self.ffi.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
496            extras::Language::Go => self.go.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
497            extras::Language::Java => self.java.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
498            extras::Language::Csharp => self.csharp.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
499            extras::Language::R => self.r.as_ref().and_then(|c| c.serde_rename_all.as_deref()),
500            extras::Language::Rust => None, // Rust uses native naming (snake_case)
501        };
502
503        if let Some(val) = override_val {
504            return val.to_string();
505        }
506
507        // 2. Language defaults.
508        match lang {
509            extras::Language::Node | extras::Language::Wasm | extras::Language::Java | extras::Language::Csharp => {
510                "camelCase".to_string()
511            }
512            extras::Language::Python
513            | extras::Language::Ruby
514            | extras::Language::Php
515            | extras::Language::Go
516            | extras::Language::Ffi
517            | extras::Language::Elixir
518            | extras::Language::R
519            | extras::Language::Rust => "snake_case".to_string(),
520        }
521    }
522
523    /// Rewrite a rust_path using path_mappings.
524    /// Matches the longest prefix first.
525    pub fn rewrite_path(&self, rust_path: &str) -> String {
526        // Sort mappings by key length descending (longest prefix first)
527        let mut mappings: Vec<_> = self.crate_config.path_mappings.iter().collect();
528        mappings.sort_by_key(|b| std::cmp::Reverse(b.0.len()));
529
530        for (from, to) in &mappings {
531            if rust_path.starts_with(from.as_str()) {
532                return format!("{}{}", to, &rust_path[from.len()..]);
533            }
534        }
535        rust_path.to_string()
536    }
537}
538
539/// Helper function to resolve output directory path from config.
540/// Replaces {name} placeholder with the crate name.
541pub fn resolve_output_dir(config_path: Option<&PathBuf>, crate_name: &str, default: &str) -> String {
542    config_path
543        .map(|p| p.to_string_lossy().replace("{name}", crate_name))
544        .unwrap_or_else(|| default.replace("{name}", crate_name))
545}
546
547/// Detect whether `serde` and `serde_json` are available in a binding crate's Cargo.toml.
548///
549/// `output_dir` is the generated source directory (e.g., `crates/spikard-py/src/`).
550/// The function walks up to find the crate's Cargo.toml and checks its `[dependencies]`
551/// for both `serde` and `serde_json`.
552pub fn detect_serde_available(output_dir: &str) -> bool {
553    let src_path = std::path::Path::new(output_dir);
554    // Walk up from the output dir to find Cargo.toml (usually output_dir is `crates/foo/src/`)
555    let mut dir = src_path;
556    loop {
557        let cargo_toml = dir.join("Cargo.toml");
558        if cargo_toml.exists() {
559            return cargo_toml_has_serde(&cargo_toml);
560        }
561        match dir.parent() {
562            Some(parent) if !parent.as_os_str().is_empty() => dir = parent,
563            _ => break,
564        }
565    }
566    false
567}
568
569/// Check if a Cargo.toml has both `serde` (with derive feature) and `serde_json` in its dependencies.
570///
571/// The `serde::Serialize` derive macro requires `serde` as a direct dependency with the `derive`
572/// feature enabled. Having only `serde_json` is not sufficient since it only pulls in `serde`
573/// transitively without the derive proc-macro.
574fn cargo_toml_has_serde(path: &std::path::Path) -> bool {
575    let content = match std::fs::read_to_string(path) {
576        Ok(c) => c,
577        Err(_) => return false,
578    };
579
580    let has_serde_json = content.contains("serde_json");
581    // Check for `serde` as a direct dependency (not just serde_json).
582    // Must match "serde" as a TOML key, not as a substring of "serde_json".
583    // Valid patterns: `serde = `, `serde.`, `[dependencies.serde]`
584    let has_serde_dep = content.lines().any(|line| {
585        let trimmed = line.trim();
586        // Match `serde = ...` or `serde.workspace = true` etc., but not `serde_json`
587        trimmed.starts_with("serde ")
588            || trimmed.starts_with("serde=")
589            || trimmed.starts_with("serde.")
590            || trimmed == "[dependencies.serde]"
591    });
592
593    has_serde_json && has_serde_dep
594}