Skip to main content

alef_core/config/
legacy.rs

1//! Detection of legacy `alef.toml` top-level keys.
2//!
3//! The pre-Phase-2 schema put everything at the top level of `alef.toml`:
4//! `[crate]`, `languages`, `[python]`, `[lint.python]`, etc.  The new schema
5//! groups things under `[workspace]` and `[[crates]]`.  This module scans raw
6//! TOML text and reports every top-level key that belongs to the old layout,
7//! with a human-readable suggestion for where to move it.
8//!
9//! Span detection: we do a best-effort line scan for each banned key rather
10//! than requiring toml span support (which is version-dependent).  The line
11//! number is 1-based; we don't track column because the line scan only
12//! recognises top-level forms (`[key]`, `[[key]]`, `key = …`) where the key
13//! always starts at column 1 anyway.
14
15use std::collections::HashMap;
16use std::sync::OnceLock;
17
18/// A single legacy key detected in raw TOML.
19#[derive(Debug, Clone)]
20pub struct LegacyKey {
21    /// The top-level TOML key that is no longer valid.
22    pub key: String,
23    /// 1-based line number of the first occurrence of the key. Best-effort —
24    /// derived from a line scan over the raw TOML rather than the parser's
25    /// span info, which would couple us to the toml crate version.
26    pub line: usize,
27    /// Human-readable migration suggestion.
28    pub suggestion: String,
29}
30
31/// Error returned by [`detect_legacy_keys`] when legacy keys are found.
32///
33/// The detected keys are accessed via [`LegacyConfigError::keys`]; the field is
34/// private so callers cannot truncate or reorder the list before formatting.
35#[derive(Debug, thiserror::Error)]
36#[error(
37    "legacy alef.toml schema detected: {} key(s) must be moved. Run `alef migrate` to update automatically.\n{}",
38    keys.len(),
39    format_keys(keys)
40)]
41pub struct LegacyConfigError {
42    keys: Vec<LegacyKey>,
43}
44
45impl LegacyConfigError {
46    /// All legacy keys discovered, in detection order.
47    pub fn keys(&self) -> &[LegacyKey] {
48        &self.keys
49    }
50}
51
52fn format_keys(keys: &[LegacyKey]) -> String {
53    keys.iter()
54        .map(|k| format!("  line {}: `{}` — {}", k.line, k.key, k.suggestion))
55        .collect::<Vec<_>>()
56        .join("\n")
57}
58
59/// Scan `raw_toml` for top-level keys that belong to the old single-crate schema.
60///
61/// Returns `Ok(())` when no legacy keys are found, or a [`LegacyConfigError`]
62/// listing every banned key with its line number and migration suggestion.
63///
64/// The check is intentionally conservative: it will not fire on `[[crates]]`
65/// entries that happen to contain a field with the same name as a banned
66/// top-level key — only genuine top-level bare assignments or section headers
67/// trigger it.
68pub fn detect_legacy_keys(raw_toml: &str) -> Result<(), LegacyConfigError> {
69    let suggestions = banned_key_suggestions();
70
71    // Parse the TOML to get the top-level key set, then find their line numbers
72    // via a line scan.  We parse first so we only flag keys that actually exist
73    // in the document rather than doing a purely textual match.
74    let table: toml::Table = match toml::from_str(raw_toml) {
75        Ok(t) => t,
76        // If the document is not valid TOML we can't do meaningful detection;
77        // let the caller's real deserializer surface the parse error.
78        Err(_) => return Ok(()),
79    };
80
81    // Collect top-level keys that are in the banned set.
82    let mut found: Vec<(String, &str)> = table
83        .keys()
84        .filter_map(|k| suggestions.get(k.as_str()).map(|s| (k.clone(), *s)))
85        .collect();
86
87    if found.is_empty() {
88        return Ok(());
89    }
90
91    // Stable order: sort by key name so output is deterministic.
92    found.sort_by(|a, b| a.0.cmp(&b.0));
93
94    // Find line numbers with a best-effort line scan.
95    let line_map = first_occurrence_lines(raw_toml, found.iter().map(|(k, _)| k.as_str()));
96
97    let keys: Vec<LegacyKey> = found
98        .into_iter()
99        .map(|(key, suggestion)| {
100            let line = line_map.get(key.as_str()).copied().unwrap_or(1);
101            LegacyKey {
102                key,
103                line,
104                suggestion: suggestion.to_string(),
105            }
106        })
107        .collect();
108
109    Err(LegacyConfigError { keys })
110}
111
112/// Return a map from banned top-level key → migration suggestion string.
113///
114/// Built once on first call and cached in a [`OnceLock`] so repeated calls to
115/// [`detect_legacy_keys`] don't re-allocate the table.
116fn banned_key_suggestions() -> &'static HashMap<&'static str, &'static str> {
117    static MAP: OnceLock<HashMap<&'static str, &'static str>> = OnceLock::new();
118    MAP.get_or_init(build_banned_key_suggestions)
119}
120
121/// Construct the banned-key suggestion map from scratch. Only called once via
122/// [`OnceLock`].
123fn build_banned_key_suggestions() -> HashMap<&'static str, &'static str> {
124    let mut m = HashMap::new();
125
126    // Singular [crate] table → [[crates]] array of tables
127    m.insert("crate", "move under `[[crates]]` (array of tables)");
128
129    // Bare `version` scalar → [workspace] alef_version
130    m.insert("version", "rename to `[workspace] alef_version`");
131
132    // Per-language config → [[crates]] sub-table
133    for lang in [
134        "python", "node", "ruby", "php", "elixir", "wasm", "ffi", "gleam", "go", "java", "dart", "kotlin", "swift",
135        "csharp", "r", "zig",
136    ] {
137        m.insert(lang, "move under `[[crates]]` for the relevant crate");
138    }
139
140    // Pipeline maps → [[crates]] sub-tables
141    for key in [
142        "output",
143        "exclude",
144        "include",
145        "lint",
146        "test",
147        "setup",
148        "update",
149        "clean",
150        "build_commands",
151        "publish",
152        "e2e",
153        "scaffold",
154        "readme",
155        "custom_files",
156        "custom_modules",
157        "custom_registrations",
158        "adapters",
159        "trait_bridges",
160    ] {
161        m.insert(key, "move under `[[crates]]` for the relevant crate");
162    }
163
164    // Bare `languages` → [workspace] languages
165    m.insert("languages", "move to `[workspace] languages`");
166
167    // Workspace-level generation/format flags
168    for key in [
169        "tools",
170        "dto",
171        "format",
172        "format_overrides",
173        "generate",
174        "generate_overrides",
175        "opaque_types",
176        "sync",
177    ] {
178        m.insert(key, "move under `[workspace.<key>]`");
179    }
180
181    // Per-crate source/dep config
182    for key in [
183        "path_mappings",
184        "auto_path_mappings",
185        "source_crates",
186        "extra_dependencies",
187    ] {
188        m.insert(key, "move under `[[crates]] <key>`");
189    }
190
191    m
192}
193
194/// For each key in `keys`, scan `raw_toml` line by line and return the
195/// 1-based line number of the first occurrence of that key as a top-level
196/// TOML key (bare assignment or section header).
197fn first_occurrence_lines<'k>(raw_toml: &str, keys: impl Iterator<Item = &'k str>) -> HashMap<String, usize> {
198    let keys_vec: Vec<&str> = keys.collect();
199    let mut result: HashMap<String, usize> = HashMap::new();
200
201    for (idx, line) in raw_toml.lines().enumerate() {
202        let line_no = idx + 1;
203        let trimmed = line.trim_start();
204
205        for &key in &keys_vec {
206            if result.contains_key(key) {
207                continue;
208            }
209            // Match: `[key]`, `[[key]]`, or `key =` / `key=` at line start.
210            if is_top_level_key_line(trimmed, key) {
211                result.insert(key.to_string(), line_no);
212            }
213        }
214
215        if result.len() == keys_vec.len() {
216            break;
217        }
218    }
219
220    result
221}
222
223/// Return true when `line` is a TOML line that introduces `key` as a
224/// top-level key (not nested inside another table header).
225fn is_top_level_key_line(line: &str, key: &str) -> bool {
226    // Section header: `[key]` or `[key.something]`
227    // Array-of-tables header: `[[key]]` or `[[key.something]]`
228    if let Some(inner) = line.strip_prefix("[[").and_then(|s| s.strip_suffix("]]")) {
229        let first_segment = inner.split('.').next().unwrap_or("").trim();
230        if first_segment == key {
231            return true;
232        }
233    }
234    if let Some(inner) = line.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
235        // Exclude `[[…]]` — already handled above; a bare `[` line won't have
236        // already been matched.
237        if !inner.starts_with('[') {
238            let first_segment = inner.split('.').next().unwrap_or("").trim();
239            if first_segment == key {
240                return true;
241            }
242        }
243    }
244    // Bare assignment: `key =` or `key=` — guard with a word boundary so that
245    // a banned key `r` does not match a longer key like `rust = "x"`.
246    if let Some(rest) = line.strip_prefix(key) {
247        let next = rest.chars().next();
248        let is_word_boundary = match next {
249            Some(c) => !(c.is_alphanumeric() || c == '_' || c == '-'),
250            None => true,
251        };
252        if is_word_boundary {
253            let trimmed = rest.trim_start();
254            if trimmed.starts_with('=') {
255                return true;
256            }
257        }
258    }
259    false
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    #[test]
267    fn detect_legacy_keys_returns_ok_for_new_schema() {
268        let toml_str = r#"
269[workspace]
270alef_version = "0.13.0"
271languages = ["python", "node"]
272
273[[crates]]
274name = "spikard"
275sources = ["src/lib.rs"]
276
277[crates.lint.python]
278check = "ruff check ."
279"#;
280        assert!(detect_legacy_keys(toml_str).is_ok());
281    }
282
283    #[test]
284    fn detect_legacy_keys_catches_bare_crate_table() {
285        // In the legacy schema, `languages` is a top-level key — it MUST appear before
286        // any section header, or after ALL section headers.  Here we put both at the top
287        // level so the TOML parser assigns them to the document root.
288        let toml_str = r#"
289languages = ["python"]
290
291[crate]
292name = "spikard"
293sources = ["src/lib.rs"]
294"#;
295        let err = detect_legacy_keys(toml_str).unwrap_err();
296        let keys: Vec<&str> = err.keys().iter().map(|k| k.key.as_str()).collect();
297        assert!(keys.contains(&"crate"), "expected `crate` in banned keys: {keys:?}");
298        assert!(
299            keys.contains(&"languages"),
300            "expected `languages` in banned keys: {keys:?}"
301        );
302    }
303
304    #[test]
305    fn detect_legacy_keys_catches_bare_version() {
306        let toml_str = r#"
307version = "0.7.7"
308languages = ["go"]
309
310[crate]
311name = "foo"
312sources = []
313"#;
314        let err = detect_legacy_keys(toml_str).unwrap_err();
315        let keys: Vec<&str> = err.keys().iter().map(|k| k.key.as_str()).collect();
316        assert!(keys.contains(&"version"), "`version` should be banned: {keys:?}");
317    }
318
319    #[test]
320    fn detect_legacy_keys_catches_bare_languages() {
321        let toml_str = r#"
322languages = ["python", "go"]
323
324[crate]
325name = "spikard"
326sources = []
327"#;
328        let err = detect_legacy_keys(toml_str).unwrap_err();
329        let keys: Vec<&str> = err.keys().iter().map(|k| k.key.as_str()).collect();
330        assert!(keys.contains(&"languages"), "`languages` should be banned: {keys:?}");
331    }
332
333    #[test]
334    fn detect_legacy_keys_catches_language_sections() {
335        for lang in [
336            "python", "node", "ruby", "go", "java", "csharp", "wasm", "ffi", "elixir", "gleam", "zig",
337        ] {
338            // languages must be top-level (before any section header)
339            let toml_str = format!(
340                "languages = [\"{lang}\"]\n\n[crate]\nname = \"foo\"\nsources = []\n\n[{lang}]\nmodule_name = \"foo\"\n"
341            );
342            let err = detect_legacy_keys(&toml_str).unwrap_err();
343            let keys: Vec<&str> = err.keys().iter().map(|k| k.key.as_str()).collect();
344            assert!(keys.contains(&lang), "`{lang}` should be detected as legacy: {keys:?}");
345        }
346    }
347
348    #[test]
349    fn detect_legacy_keys_catches_workspace_level_pipeline_keys() {
350        // languages and crate must be top-level; section headers below belong to root.
351        let toml_str = r#"
352languages = ["python"]
353
354[crate]
355name = "foo"
356sources = []
357
358[tools]
359python_package_manager = "uv"
360
361[dto]
362python = "dataclass"
363
364[format]
365enabled = true
366
367[generate]
368bindings = true
369
370[opaque_types]
371Tree = "tree_sitter::Tree"
372"#;
373        let err = detect_legacy_keys(toml_str).unwrap_err();
374        let keys: Vec<&str> = err.keys().iter().map(|k| k.key.as_str()).collect();
375        for expected in ["tools", "dto", "format", "generate", "opaque_types"] {
376            assert!(
377                keys.contains(&expected),
378                "`{expected}` should be detected as legacy: {keys:?}"
379            );
380        }
381    }
382
383    #[test]
384    fn detect_legacy_keys_catches_per_crate_source_keys() {
385        // Top-level scalars and tables must appear before any section header or after
386        // the last one — put them all at the top so TOML assigns them to the document root.
387        let toml_str = r#"
388languages = ["python"]
389auto_path_mappings = true
390
391[crate]
392name = "foo"
393sources = []
394
395[path_mappings]
396foo = "foo_core"
397
398[extra_dependencies]
399pyo3 = "0.22"
400"#;
401        let err = detect_legacy_keys(toml_str).unwrap_err();
402        let keys: Vec<&str> = err.keys().iter().map(|k| k.key.as_str()).collect();
403        for expected in ["path_mappings", "auto_path_mappings", "extra_dependencies"] {
404            assert!(
405                keys.contains(&expected),
406                "`{expected}` should be detected as legacy: {keys:?}"
407            );
408        }
409    }
410
411    #[test]
412    fn detect_legacy_keys_catches_pipeline_table_keys() {
413        let toml_str = r#"
414languages = ["python"]
415
416[crate]
417name = "foo"
418sources = []
419
420[lint.python]
421check = "ruff check ."
422
423[test.python]
424command = "pytest"
425
426[build_commands.go]
427build = "go build ./..."
428
429[publish]
430vendored = true
431
432[e2e]
433fixtures_dir = "e2e/fixtures"
434
435[scaffold]
436description = "My lib"
437
438[readme]
439template_dir = "docs/templates"
440"#;
441        let err = detect_legacy_keys(toml_str).unwrap_err();
442        let keys: Vec<&str> = err.keys().iter().map(|k| k.key.as_str()).collect();
443        for expected in ["lint", "test", "build_commands", "publish", "e2e", "scaffold", "readme"] {
444            assert!(
445                keys.contains(&expected),
446                "`{expected}` should be detected as legacy: {keys:?}"
447            );
448        }
449    }
450
451    #[test]
452    fn detect_legacy_keys_line_numbers_are_positive() {
453        let toml_str = r#"
454languages = ["python"]
455
456[crate]
457name = "foo"
458sources = []
459"#;
460        let err = detect_legacy_keys(toml_str).unwrap_err();
461        for k in err.keys() {
462            assert!(k.line > 0, "line number must be positive for key `{}`", k.key);
463        }
464    }
465
466    #[test]
467    fn detect_legacy_keys_suggestions_are_non_empty() {
468        let toml_str = r#"
469languages = ["python"]
470
471[crate]
472name = "foo"
473sources = []
474
475[lint.python]
476check = "ruff check ."
477"#;
478        let err = detect_legacy_keys(toml_str).unwrap_err();
479        for k in err.keys() {
480            assert!(
481                !k.suggestion.is_empty(),
482                "suggestion must be non-empty for key `{}`",
483                k.key
484            );
485        }
486    }
487
488    #[test]
489    fn detect_legacy_keys_invalid_toml_returns_ok() {
490        // Invalid TOML should not panic — return Ok and let the real parser
491        // surface the error.
492        let bad = "[[[ not valid toml";
493        assert!(detect_legacy_keys(bad).is_ok());
494    }
495
496    #[test]
497    fn is_top_level_key_line_respects_word_boundary_on_bare_assignment() {
498        // The banned key `r` must not match `rust = ...` (different identifier
499        // that happens to start with `r`).
500        assert!(!is_top_level_key_line("rust = true", "r"));
501        assert!(!is_top_level_key_line("ruby_extras = []", "ruby"));
502        // But it must still match the genuine assignment forms.
503        assert!(is_top_level_key_line("r = { something = true }", "r"));
504        assert!(is_top_level_key_line("ruby = {}", "ruby"));
505        assert!(is_top_level_key_line("ruby={}", "ruby"));
506    }
507}