rustio_core/ai_gen/
mod.rs

1//! Phase 8.0 — AI-assisted schema generation, developer-tool only.
2//!
3//! This module sits **alongside** the existing `ai/` module (which is
4//! the deterministic, rule-based plan/review/apply pipeline). It does
5//! NOT replace or extend that pipeline; it produces a `schema::Schema`
6//! JSON document that the operator then runs through
7//! `rustio ai plan / review / apply` manually.
8//!
9//! ## Hard contract
10//!
11//! - LLM calls happen ONLY from the CLI. No HTTP handler, no admin
12//!   page, no background task in this crate calls into here. The
13//!   deployed `rustio` binary serving requests has no network reach to
14//!   any LLM provider.
15//! - The LLM's output is parsed as `schema::Schema` JSON and run
16//!   through `Schema::validate()` before it leaves this module. A
17//!   malformed or semantically-invalid response is an error;
18//!   half-validated artefacts never reach disk.
19//! - Nothing here writes files, runs migrations, or modifies the DB.
20//!   File I/O lives at the CLI layer where the operator can confirm.
21//!
22//! ## Pipeline
23//!
24//! ```text
25//! prompt ──► client ──► raw JSON string ──► serde_json ──► Schema ──► validate() ──► Ok(Schema)
26//!                                                          │
27//!                                                          └─ on failure: SchemaError, no file written
28//! ```
29//!
30//! The CLI's `rustio ai generate` command owns the file write and the
31//! `--force` overwrite guard — see `rustio-cli/src/main.rs`.
32
33pub mod client;
34pub mod diff;
35pub mod prompts;
36
37use crate::schema::{Schema, SchemaError};
38
39/// Errors `ai_gen::generate` can surface. Kept narrow on purpose:
40/// callers only need to distinguish "couldn't talk to the API" from
41/// "the API replied but the reply isn't a valid Schema."
42#[derive(Debug)]
43pub enum GenerateError {
44    /// Missing / empty `ANTHROPIC_API_KEY`.
45    MissingApiKey,
46    /// HTTP error talking to the provider (network, auth, rate limit,
47    /// 5xx). Carries the provider's message for triage.
48    Transport(String),
49    /// The provider replied but the body wasn't a parseable `Schema`
50    /// JSON document. Wraps the underlying parse / validation error.
51    Schema(SchemaError),
52    /// Phase 9.1 — the model returned a syntactically valid schema
53    /// with zero models, while the input had at least one. Hard
54    /// safety rule for `update`: a "remove everything" instruction
55    /// must NOT clear the schema. No bypass flag; never overridable.
56    EmptyResult,
57}
58
59impl std::fmt::Display for GenerateError {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        match self {
62            Self::MissingApiKey => f.write_str(
63                "ANTHROPIC_API_KEY is not set. Set it in your environment before running \
64                 `rustio ai generate`.",
65            ),
66            Self::Transport(msg) => write!(f, "anthropic API transport error: {msg}"),
67            Self::Schema(err) => write!(f, "anthropic API returned invalid schema: {err}"),
68            Self::EmptyResult => f.write_str(
69                "Refusing to apply update: schema would become empty",
70            ),
71        }
72    }
73}
74
75impl std::error::Error for GenerateError {}
76
77impl From<SchemaError> for GenerateError {
78    fn from(e: SchemaError) -> Self {
79        GenerateError::Schema(e)
80    }
81}
82
83/// Top-level entry: prose `prompt` → validated `Schema`.
84///
85/// Calls the Anthropic API, parses the response as Schema JSON, runs
86/// `Schema::validate()`. The CLI is the only intended caller; tests
87/// hit the inner helpers (`prompts::build_user_prompt`,
88/// `parse_response`) directly to avoid live API calls in CI.
89pub async fn generate(prompt: &str) -> Result<Schema, GenerateError> {
90    let api_key = api_key()?;
91    let body = client::request(&api_key, prompt)
92        .await
93        .map_err(|e| GenerateError::Transport(e.to_string()))?;
94    parse_response(&body)
95}
96
97/// Phase 8.1 — sibling of `generate`: hand the model the existing
98/// schema + an instruction, get back a validated full `Schema` with
99/// the change applied. Single LLM call. The CLI is responsible for
100/// computing + showing the diff and for the y/N confirmation.
101///
102/// Phase 9.1 — empty-schema safety guard. After parsing, if the
103/// model emptied the schema, refuse the result. No bypass flag.
104pub async fn update(existing: &Schema, instruction: &str) -> Result<Schema, GenerateError> {
105    let api_key = api_key()?;
106    let existing_json = existing
107        .to_pretty_json()
108        .map_err(|e| GenerateError::Transport(format!("serialise existing schema: {e}")))?;
109    let body = client::request_update(&api_key, &existing_json, instruction)
110        .await
111        .map_err(|e| GenerateError::Transport(e.to_string()))?;
112    let updated = parse_response(&body)?;
113    check_not_empty(existing, &updated)?;
114    Ok(updated)
115}
116
117/// Phase 9.1 — hard safety guard: an `update` MUST NOT clear a
118/// non-empty schema. Returns `Err(GenerateError::EmptyResult)`
119/// when:
120///   - input had >= 1 model AND
121///   - output has 0 models.
122///
123/// Empty → empty (genuinely-empty input) and any → non-empty paths
124/// pass through. Extracted as a free function so tests can pin the
125/// truth table without standing up a fake LLM flow.
126pub(crate) fn check_not_empty(old: &Schema, new: &Schema) -> Result<(), GenerateError> {
127    if new.models.is_empty() && !old.models.is_empty() {
128        return Err(GenerateError::EmptyResult);
129    }
130    Ok(())
131}
132
133/// Read + validate the API key once for both entry points. Empty /
134/// whitespace-only values count as missing.
135fn api_key() -> Result<String, GenerateError> {
136    std::env::var("ANTHROPIC_API_KEY")
137        .ok()
138        .filter(|s| !s.trim().is_empty())
139        .ok_or(GenerateError::MissingApiKey)
140}
141
142/// Phase 8.2 — the read-only analyze report. Three flat fields: each
143/// list is human-readable strings (one per line in the model's
144/// output), the score is on a 0-10 scale. CLI prints these directly;
145/// nothing here writes to disk or modifies the schema.
146#[derive(Debug, Clone, PartialEq)]
147pub struct AnalyzeReport {
148    pub issues: Vec<String>,
149    pub suggestions: Vec<String>,
150    pub score: f32,
151}
152
153/// Phase 8.2 — analyze-path errors. Mirrors `GenerateError`'s shape
154/// but with one less variant (no Schema-validation failure, since
155/// analyze never produces a Schema).
156#[derive(Debug)]
157pub enum AnalyzeError {
158    MissingApiKey,
159    Transport(String),
160    /// Couldn't serialise the input schema before sending. Should
161    /// only fire if the caller hands us a schema that fails its own
162    /// `validate()`; the CLI guards this with `load_schema`.
163    Encode(String),
164}
165
166impl std::fmt::Display for AnalyzeError {
167    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168        match self {
169            Self::MissingApiKey => f.write_str(
170                "ANTHROPIC_API_KEY is not set. Set it in your environment before running \
171                 `rustio ai analyze`.",
172            ),
173            Self::Transport(msg) => write!(f, "anthropic API transport error: {msg}"),
174            Self::Encode(msg) => write!(f, "could not serialise schema for analyze: {msg}"),
175        }
176    }
177}
178
179impl std::error::Error for AnalyzeError {}
180
181/// Phase 8.2 — read-only audit. Hand the model the schema, get back
182/// a structured-text analysis, parse into `AnalyzeReport`. Single
183/// LLM call. Does NOT write to disk, does NOT modify the schema,
184/// does NOT call `update` or `generate` internally.
185pub async fn analyze(schema: &Schema) -> Result<AnalyzeReport, AnalyzeError> {
186    let api_key = std::env::var("ANTHROPIC_API_KEY")
187        .ok()
188        .filter(|s| !s.trim().is_empty())
189        .ok_or(AnalyzeError::MissingApiKey)?;
190    let existing_json = schema
191        .to_pretty_json()
192        .map_err(|e| AnalyzeError::Encode(e.to_string()))?;
193    let body = client::request_analyze(&api_key, &existing_json)
194        .await
195        .map_err(AnalyzeError::Transport)?;
196    Ok(parse_analyze_response(&body))
197}
198
199/// Parse a model response into `AnalyzeReport`. Tolerant by design:
200///
201/// 1. **Structured-text path** — find lines starting with `ISSUES:`,
202///    `SUGGESTIONS:`, `SCORE:` (case-insensitive prefix match);
203///    collect bullet items between headers. The score line accepts
204///    "7.5", "7.5/10", "7", or any prefix that parses as f32.
205/// 2. **Fallback path** — if no recognised section header appears,
206///    treat the entire body as `suggestions` (one entry per
207///    non-empty line, stripping any leading "- " bullet) so the
208///    operator sees something useful instead of an empty report.
209///
210/// The score defaults to 0.0 when the SCORE line is missing or
211/// unparseable. Callers wanting to gate on "did the model give us a
212/// score" can check `report.score > 0.0`.
213pub fn parse_analyze_response(body: &str) -> AnalyzeReport {
214    let body = body.trim();
215    if body.is_empty() {
216        return AnalyzeReport {
217            issues: Vec::new(),
218            suggestions: Vec::new(),
219            score: 0.0,
220        };
221    }
222
223    let lower = body.to_lowercase();
224    let has_section_header = lower.contains("issues:")
225        || lower.contains("suggestions:")
226        || lower.contains("score:");
227
228    if !has_section_header {
229        // Fallback: no structured headers. Treat everything as
230        // suggestions so the developer at least sees the model's
231        // analysis, even if it's free-form.
232        let suggestions = collect_bullets(body);
233        return AnalyzeReport {
234            issues: Vec::new(),
235            suggestions,
236            score: 0.0,
237        };
238    }
239
240    let mut section = Section::None;
241    let mut issues: Vec<String> = Vec::new();
242    let mut suggestions: Vec<String> = Vec::new();
243    let mut score: f32 = 0.0;
244
245    for raw_line in body.lines() {
246        let line = raw_line.trim();
247        let lower = line.to_lowercase();
248        // Section-header detection — match prefix so a line like
249        // "ISSUES: (none)" is still classified as the header line
250        // (and the "(none)" body sits inside the section as zero
251        // bullet items, which is what we want).
252        if lower.starts_with("issues:") {
253            section = Section::Issues;
254            continue;
255        }
256        if lower.starts_with("suggestions:") {
257            section = Section::Suggestions;
258            continue;
259        }
260        if lower.starts_with("score:") {
261            section = Section::Score;
262            score = parse_score(line["score:".len()..].trim()).unwrap_or(0.0);
263            continue;
264        }
265
266        // Skip blank lines and the explicit "(none)" placeholder.
267        if line.is_empty() || line.eq_ignore_ascii_case("(none)") {
268            continue;
269        }
270
271        // Strip a single leading bullet so consumers don't see "- ".
272        let item = line
273            .strip_prefix("- ")
274            .or_else(|| line.strip_prefix("* "))
275            .unwrap_or(line)
276            .to_string();
277
278        match section {
279            Section::Issues => issues.push(item),
280            Section::Suggestions => suggestions.push(item),
281            // Lines after SCORE: are tolerated but ignored — the
282            // model occasionally adds a one-line summary.
283            Section::Score | Section::None => {}
284        }
285    }
286
287    AnalyzeReport { issues, suggestions, score }
288}
289
290/// Internal section marker for the analyze parser.
291enum Section {
292    None,
293    Issues,
294    Suggestions,
295    Score,
296}
297
298/// Pull a leading f32 out of a string like "7.5" / "7.5 / 10" /
299/// "7.5/10" / "  8.0  ". Returns None if no float prefix matches.
300fn parse_score(s: &str) -> Option<f32> {
301    let s = s.trim();
302    // Walk forward until the prefix stops looking like a number.
303    let end = s
304        .char_indices()
305        .find(|(_, c)| !(c.is_ascii_digit() || *c == '.' || *c == '-'))
306        .map(|(i, _)| i)
307        .unwrap_or(s.len());
308    s[..end].parse::<f32>().ok()
309}
310
311/// Split a body into bullet-style entries. Used by the unstructured
312/// fallback. Strips the leading bullet (`- ` or `* `) if present;
313/// blank lines are dropped.
314fn collect_bullets(body: &str) -> Vec<String> {
315    body.lines()
316        .map(str::trim)
317        .filter(|l| !l.is_empty())
318        .map(|l| {
319            l.strip_prefix("- ")
320                .or_else(|| l.strip_prefix("* "))
321                .unwrap_or(l)
322                .to_string()
323        })
324        .collect()
325}
326
327/// Phase 8.4 — the explain-diff report. Two parallel bullet lists.
328/// Empty `why` AND empty `impact` is a legitimate result for an
329/// empty diff (BEFORE == AFTER); the CLI prints "(none)" in that
330/// case rather than nothing.
331#[derive(Debug, Clone, PartialEq)]
332pub struct ExplainReport {
333    pub why: Vec<String>,
334    pub impact: Vec<String>,
335}
336
337/// Phase 8.4 — explain-path errors. Mirrors the analyze shape; no
338/// Schema-validation failure variant because explain doesn't
339/// produce a Schema.
340#[derive(Debug)]
341pub enum ExplainError {
342    MissingApiKey,
343    Transport(String),
344    /// Couldn't serialise one of the input schemas before sending.
345    Encode(String),
346}
347
348impl std::fmt::Display for ExplainError {
349    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
350        match self {
351            Self::MissingApiKey => f.write_str(
352                "ANTHROPIC_API_KEY is not set. Set it in your environment before requesting \
353                 an explanation.",
354            ),
355            Self::Transport(msg) => write!(f, "anthropic API transport error: {msg}"),
356            Self::Encode(msg) => write!(f, "could not serialise schema for explain: {msg}"),
357        }
358    }
359}
360
361impl std::error::Error for ExplainError {}
362
363/// Phase 8.4 — narrate a diff between two schemas with one extra
364/// LLM call. Receives the `api_key` directly (rather than reading
365/// it from the env like `generate` / `update` / `analyze`) so the
366/// caller can short-circuit when the flag is off without ever
367/// touching the env. The CLI's `--explain` gate hands this in.
368///
369/// MAX 1 LLM call. NEVER mutates either schema. NEVER recurses
370/// (no follow-up calls based on the response).
371pub async fn explain_diff(
372    old: &Schema,
373    new: &Schema,
374    api_key: &str,
375) -> Result<ExplainReport, ExplainError> {
376    if api_key.trim().is_empty() {
377        return Err(ExplainError::MissingApiKey);
378    }
379    let old_json = old
380        .to_pretty_json()
381        .map_err(|e| ExplainError::Encode(e.to_string()))?;
382    let new_json = new
383        .to_pretty_json()
384        .map_err(|e| ExplainError::Encode(e.to_string()))?;
385    let body = client::request_explain(api_key, &old_json, &new_json)
386        .await
387        .map_err(ExplainError::Transport)?;
388    Ok(parse_explain_response(&body))
389}
390
391/// Parse a model response into `ExplainReport`. Same tolerance
392/// strategy as `parse_analyze_response`:
393///
394/// 1. **Structured-text path** — find lines starting with `WHY:` /
395///    `IMPACT:` (case-insensitive prefix match); collect bullets
396///    between them. The "(none)" placeholder is honoured.
397/// 2. **Fallback** — if no recognised section header appears,
398///    treat the entire body as `why` so the operator at least
399///    sees the model's commentary, with `impact` empty.
400pub fn parse_explain_response(body: &str) -> ExplainReport {
401    let body = body.trim();
402    if body.is_empty() {
403        return ExplainReport { why: Vec::new(), impact: Vec::new() };
404    }
405
406    let lower = body.to_lowercase();
407    let has_section_header = lower.contains("why:") || lower.contains("impact:");
408    if !has_section_header {
409        return ExplainReport { why: collect_bullets(body), impact: Vec::new() };
410    }
411
412    let mut section = ExplainSection::None;
413    let mut why: Vec<String> = Vec::new();
414    let mut impact: Vec<String> = Vec::new();
415
416    for raw_line in body.lines() {
417        let line = raw_line.trim();
418        let lower = line.to_lowercase();
419
420        if lower.starts_with("why:") {
421            section = ExplainSection::Why;
422            continue;
423        }
424        if lower.starts_with("impact:") {
425            section = ExplainSection::Impact;
426            continue;
427        }
428
429        if line.is_empty() || line.eq_ignore_ascii_case("(none)") {
430            continue;
431        }
432
433        // Inside a section, only bullet-shaped lines (`- ` / `* `)
434        // count as items. A non-bullet line ENDS the section; the
435        // line itself is dropped (the system prompt forbids
436        // commentary outside the two sections, so anything else is
437        // the model breaking contract). This is the load-bearing
438        // rule that makes `explain_ignores_extra_text` pass.
439        let bullet = line
440            .strip_prefix("- ")
441            .or_else(|| line.strip_prefix("* "));
442
443        match (&section, bullet) {
444            (ExplainSection::Why, Some(item)) => why.push(item.to_string()),
445            (ExplainSection::Impact, Some(item)) => impact.push(item.to_string()),
446            (ExplainSection::Why | ExplainSection::Impact, None) => {
447                section = ExplainSection::None;
448            }
449            (ExplainSection::None, _) => {}
450        }
451    }
452
453    ExplainReport { why, impact }
454}
455
456/// Internal section marker for the explain parser.
457enum ExplainSection {
458    None,
459    Why,
460    Impact,
461}
462
463/// Parse a raw provider response body into a validated `Schema`.
464/// Extracted so tests can exercise it against fixture JSON without a
465/// network call.
466///
467/// The provider is asked for a JSON object matching `Schema` directly
468/// — no wrapper envelope, no markdown fence. `extract_schema_json`
469/// is tolerant of a single fenced ```json block in case the model
470/// adds one despite the prompt's instruction not to.
471pub fn parse_response(body: &str) -> Result<Schema, GenerateError> {
472    let json = extract_schema_json(body);
473    Ok(Schema::parse(json)?)
474}
475
476/// If `body` is wrapped in a single ```json … ``` fence, return the
477/// inner content; otherwise return the body as-is. Defensive: the
478/// system prompt explicitly tells the model not to fence the output,
479/// but real LLMs sometimes do anyway.
480pub(crate) fn extract_schema_json(body: &str) -> &str {
481    let trimmed = body.trim();
482    let stripped = trimmed
483        .strip_prefix("```json")
484        .or_else(|| trimmed.strip_prefix("```"))
485        .unwrap_or(trimmed);
486    let stripped = stripped.trim_start_matches('\n');
487    stripped.strip_suffix("```").map_or(stripped, str::trim_end)
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493
494    /// Phase 8.0 — fenced output from the model is unwrapped before
495    /// parsing. The system prompt forbids fencing but the parser
496    /// tolerates it as a defensive measure.
497    #[test]
498    fn extract_schema_json_strips_fence() {
499        let fenced = "```json\n{\"version\":2}\n```";
500        assert_eq!(extract_schema_json(fenced), "{\"version\":2}");
501
502        let fenced_no_lang = "```\n{\"version\":2}\n```";
503        assert_eq!(extract_schema_json(fenced_no_lang), "{\"version\":2}");
504
505        // No fence → byte-identical pass-through (modulo outer trim).
506        let plain = "  {\"version\":2}  ";
507        assert_eq!(extract_schema_json(plain), "{\"version\":2}");
508    }
509
510    /// Phase 8.0 — a full provider response that parses as a valid
511    /// Schema. Fixture is hand-built so the test never touches the
512    /// network. This is the green path for `parse_response`.
513    #[test]
514    fn parse_response_accepts_valid_schema() {
515        let body = r#"{
516            "version": 2,
517            "rustio_version": "1.0.0",
518            "models": [
519                {
520                    "name": "Post",
521                    "table": "posts",
522                    "admin_name": "posts",
523                    "display_name": "Posts",
524                    "singular_name": "Post",
525                    "fields": [
526                        { "name": "id",    "type": "i64",      "nullable": false, "editable": true },
527                        { "name": "title", "type": "String",   "nullable": false, "editable": true }
528                    ],
529                    "relations": []
530                }
531            ]
532        }"#;
533
534        let schema = parse_response(body).expect("valid response parses");
535        assert_eq!(schema.models.len(), 1);
536        assert_eq!(schema.models[0].name, "Post");
537    }
538
539    /// Phase 8.1 — fixture covering the `update` happy path: the
540    /// model returns a full schema with one new model added (Tag)
541    /// and the original model preserved. Asserts both: the new
542    /// model lands AND the existing one is byte-identical (no
543    /// silent rename / reorder).
544    #[test]
545    fn update_adds_new_model() {
546        // Fixture response — what the model would send back when
547        // asked "add tags" against a one-model schema.
548        let response = r#"{
549            "version": 2,
550            "rustio_version": "1.0.0",
551            "models": [
552                {
553                    "name": "Post",
554                    "table": "posts",
555                    "admin_name": "posts",
556                    "display_name": "Posts",
557                    "singular_name": "Post",
558                    "fields": [
559                        { "name": "id",    "type": "i64",    "nullable": false, "editable": true },
560                        { "name": "title", "type": "String", "nullable": false, "editable": true }
561                    ],
562                    "relations": []
563                },
564                {
565                    "name": "Tag",
566                    "table": "tags",
567                    "admin_name": "tags",
568                    "display_name": "Tags",
569                    "singular_name": "Tag",
570                    "fields": [
571                        { "name": "id",    "type": "i64",    "nullable": false, "editable": true },
572                        { "name": "label", "type": "String", "nullable": false, "editable": true }
573                    ],
574                    "relations": []
575                }
576            ]
577        }"#;
578        let updated = parse_response(response).expect("valid update parses");
579        assert!(updated.models.iter().any(|m| m.name == "Tag"));
580        assert!(updated.models.iter().any(|m| m.name == "Post"));
581    }
582
583    /// Phase 8.1 — preserve-by-default: a fixture response that
584    /// keeps the original model + adds a status field to it must
585    /// flow through parse_response cleanly AND the diff against the
586    /// original must NOT report any of the surviving fields as
587    /// removed. Locks the contract end-to-end.
588    #[test]
589    fn update_preserves_existing_fields() {
590        let original = r#"{
591            "version": 2,
592            "rustio_version": "1.0.0",
593            "models": [
594                {
595                    "name": "Post",
596                    "table": "posts",
597                    "admin_name": "posts",
598                    "display_name": "Posts",
599                    "singular_name": "Post",
600                    "fields": [
601                        { "name": "id",    "type": "i64",    "nullable": false, "editable": true },
602                        { "name": "title", "type": "String", "nullable": false, "editable": true },
603                        { "name": "body",  "type": "String", "nullable": false, "editable": true }
604                    ],
605                    "relations": []
606                }
607            ]
608        }"#;
609        let response = r#"{
610            "version": 2,
611            "rustio_version": "1.0.0",
612            "models": [
613                {
614                    "name": "Post",
615                    "table": "posts",
616                    "admin_name": "posts",
617                    "display_name": "Posts",
618                    "singular_name": "Post",
619                    "fields": [
620                        { "name": "id",     "type": "i64",    "nullable": false, "editable": true },
621                        { "name": "title",  "type": "String", "nullable": false, "editable": true },
622                        { "name": "body",   "type": "String", "nullable": false, "editable": true },
623                        { "name": "status", "type": "String", "nullable": false, "editable": true }
624                    ],
625                    "relations": []
626                }
627            ]
628        }"#;
629
630        let old = parse_response(original).expect("original parses");
631        let new = parse_response(response).expect("response parses");
632        let changes = diff::diff(&old, &new);
633
634        // No FieldRemoved for any of the surviving fields.
635        for surviving in ["id", "title", "body"] {
636            assert!(
637                !changes.iter().any(|c| matches!(c,
638                    diff::Change::FieldRemoved { field, .. } if field == surviving
639                )),
640                "preserved field {surviving} surfaced as removed: {changes:?}"
641            );
642        }
643        // Exactly one FieldAdded — the new status field.
644        let adds: Vec<_> = changes
645            .iter()
646            .filter(|c| matches!(c, diff::Change::FieldAdded { .. }))
647            .collect();
648        assert_eq!(adds.len(), 1);
649    }
650
651    /// Phase 8.1 — invalid response (malformed JSON) must surface as
652    /// GenerateError::Schema and never reach the diff / file-write
653    /// layer. The CLI relies on this to abort before clobbering the
654    /// existing schema.
655    #[test]
656    fn update_invalid_json_rejected() {
657        // Malformed JSON: dangling comma after `models`.
658        let bad = r#"{
659            "version": 2,
660            "rustio_version": "1.0.0",
661            "models": [],
662        }"#;
663        let err = parse_response(bad).expect_err("malformed JSON must be rejected");
664        assert!(matches!(err, GenerateError::Schema(_)));
665    }
666
667    /// Phase 8.1 / spec test #5 — meta-test asserting that the
668    /// update path is reachable through pure functions (no live
669    /// API). If this test ever needs `ANTHROPIC_API_KEY` to run,
670    /// something has been wired wrong. The compile here proves it:
671    /// the symbols exercised by the previous four tests are
672    /// `parse_response` and `diff::diff` — neither hits the network.
673    /// This test just imports the same surface to lock the contract.
674    #[test]
675    fn no_live_api_calls() {
676        // Unset the env var explicitly. If any of the symbols below
677        // tried to read it we'd hit MissingApiKey → easy to spot.
678        let _ = std::env::var("ANTHROPIC_API_KEY"); // read, don't write
679        let dummy = r#"{
680            "version": 2, "rustio_version": "1.0.0",
681            "models": [
682                { "name": "Post", "table": "posts", "admin_name": "posts",
683                  "display_name": "Posts", "singular_name": "Post",
684                  "fields": [
685                      { "name": "id", "type": "i64", "nullable": false, "editable": true }
686                  ],
687                  "relations": []
688                }
689            ]
690        }"#;
691        let parsed = parse_response(dummy).expect("offline parse path works");
692        let _ = diff::diff(&parsed, &parsed); // diff is offline too
693    }
694
695    /// Phase 8.2 — well-formatted analyze response with one issue
696    /// citing a missing relation target. Locks the structured-text
697    /// parser end-to-end on the green path.
698    #[test]
699    fn analyze_detects_missing_relation_model() {
700        let body = "ISSUES:\n\
701- Post.author_id has relation but User model missing\n\
702\n\
703SUGGESTIONS:\n\
704- Add created_at timestamp to all models\n\
705\n\
706SCORE: 6.0\n";
707        let report = parse_analyze_response(body);
708        assert_eq!(report.issues.len(), 1);
709        assert!(report.issues[0].contains("author_id"));
710        assert!(report.issues[0].contains("User"));
711        assert_eq!(report.suggestions.len(), 1);
712        assert!((report.score - 6.0).abs() < f32::EPSILON);
713    }
714
715    /// Phase 8.2 — best-practice suggestions land in the suggestions
716    /// bucket, not issues. Locks the section-routing logic.
717    #[test]
718    fn analyze_suggests_best_practices() {
719        let body = "ISSUES:\n\
720(none)\n\
721\n\
722SUGGESTIONS:\n\
723- Add created_at and updated_at to every model\n\
724- Index Comment.post_id\n\
725- Consider an enum for Post.status\n\
726\n\
727SCORE: 8.5\n";
728        let report = parse_analyze_response(body);
729        assert!(report.issues.is_empty(), "issues bucket should be empty");
730        assert_eq!(report.suggestions.len(), 3);
731        assert!(report.suggestions.iter().any(|s| s.contains("created_at")));
732        assert!(report.suggestions.iter().any(|s| s.contains("Index")));
733        assert!(report.suggestions.iter().any(|s| s.contains("enum")));
734        assert!((report.score - 8.5).abs() < f32::EPSILON);
735    }
736
737    /// Phase 8.2 — composite valid output: bullets in both buckets,
738    /// score with the spec example shape `7.5 / 10`. The "/ 10"
739    /// suffix must be tolerated; only the leading float is consumed.
740    #[test]
741    fn analyze_parsing_valid_output() {
742        let body = "ISSUES:\n\
743- Post.author_id has relation but User model missing\n\
744- Comment.post_id not indexed\n\
745\n\
746SUGGESTIONS:\n\
747- Add created_at timestamp to all models\n\
748- Add index on foreign keys\n\
749\n\
750SCORE: 7.5 / 10\n";
751        let report = parse_analyze_response(body);
752        assert_eq!(report.issues.len(), 2);
753        assert_eq!(report.suggestions.len(), 2);
754        assert!((report.score - 7.5).abs() < f32::EPSILON);
755    }
756
757    /// Phase 8.2 — fallback path: unstructured response with no
758    /// section headers. Parser must NOT fail; everything lands in
759    /// `suggestions` so the operator at least sees the model's
760    /// commentary, with score defaulted to 0.0.
761    #[test]
762    fn analyze_handles_unstructured_output() {
763        let body = "Looks fine overall. Maybe think about adding indexes \n\
764on the foreign keys, and consider an enum for Post.status.\n\
765- Add created_at on every model.";
766        let report = parse_analyze_response(body);
767        assert!(report.issues.is_empty(), "unstructured input → issues must be empty");
768        assert!(
769            !report.suggestions.is_empty(),
770            "unstructured input → fallback should populate suggestions"
771        );
772        // Each non-empty line lands as one suggestion (with bullets
773        // stripped). Three non-empty source lines → three entries.
774        assert_eq!(report.suggestions.len(), 3);
775        assert!(report.suggestions[2].starts_with("Add created_at"));
776        assert_eq!(report.score, 0.0, "no SCORE: header → default 0.0");
777    }
778
779    /// Phase 8.2 / spec test #5 — meta-test asserting the analyze
780    /// path is reachable through pure functions (no live API).
781    /// Mirrors the equivalent test for `update`. Compile is the
782    /// proof: every analyze test in this module exercises only
783    /// `parse_analyze_response`. If anyone wires it to the network,
784    /// this test starts depending on `ANTHROPIC_API_KEY` and stands
785    /// out.
786    #[test]
787    fn analyze_no_live_api_calls() {
788        // Reading the env var is fine; calling out across the wire
789        // is not. The previous tests exercise the offline-only
790        // surface; this test re-imports it to lock the contract.
791        let _ = std::env::var("ANTHROPIC_API_KEY"); // read-only
792        let report = parse_analyze_response(
793            "ISSUES:\n(none)\n\nSUGGESTIONS:\n(none)\n\nSCORE: 9\n",
794        );
795        assert_eq!(report.issues.len(), 0);
796        assert_eq!(report.suggestions.len(), 0);
797        assert_eq!(report.score, 9.0);
798    }
799
800    // ----- Phase 8.4 — explain-diff parser tests -----------------
801
802    /// Phase 8.4 / spec test #1 — green path: well-formatted
803    /// response with both sections + bullets. Locks the contract
804    /// that the parser splits the buckets correctly and strips
805    /// "- " bullets.
806    #[test]
807    fn explain_parses_valid_response() {
808        let body = "WHY:\n\
809- Tags allow flexible categorization of posts\n\
810- Decoupling from rigid categories\n\
811\n\
812IMPACT:\n\
813- Adds new table (Tag)\n\
814- Introduces many-to-many relationship\n";
815        let report = parse_explain_response(body);
816        assert_eq!(report.why.len(), 2);
817        assert!(report.why[0].starts_with("Tags allow"));
818        assert!(report.why[1].starts_with("Decoupling"));
819        assert_eq!(report.impact.len(), 2);
820        assert!(report.impact[0].starts_with("Adds new table"));
821        assert!(report.impact[1].starts_with("Introduces"));
822    }
823
824    /// Phase 8.4 / spec test #2 — IMPACT section omitted entirely
825    /// must not panic; missing section yields an empty bucket.
826    /// Symmetrical: WHY omitted does the same.
827    #[test]
828    fn explain_handles_missing_sections() {
829        // IMPACT only.
830        let body = "IMPACT:\n- Adds Tag table\n";
831        let report = parse_explain_response(body);
832        assert!(report.why.is_empty(), "WHY missing → empty bucket");
833        assert_eq!(report.impact.len(), 1);
834
835        // WHY only.
836        let body = "WHY:\n- Tags help categorize\n";
837        let report = parse_explain_response(body);
838        assert_eq!(report.why.len(), 1);
839        assert!(report.impact.is_empty(), "IMPACT missing → empty bucket");
840
841        // Both sections present but explicitly "(none)".
842        let body = "WHY:\n(none)\n\nIMPACT:\n(none)\n";
843        let report = parse_explain_response(body);
844        assert!(report.why.is_empty());
845        assert!(report.impact.is_empty());
846    }
847
848    /// Phase 8.4 / spec test #3 — extra commentary outside the two
849    /// section labels must be dropped, not folded into either
850    /// bucket. The spec forbids extra sections, but real models
851    /// occasionally add a closing line; the parser tolerates it.
852    #[test]
853    fn explain_ignores_extra_text() {
854        let body = "WHY:\n\
855- Improves categorization\n\
856\n\
857IMPACT:\n\
858- New table\n\
859\n\
860This concludes the explanation. Hope it helps!\n";
861        let report = parse_explain_response(body);
862        assert_eq!(report.why.len(), 1);
863        assert_eq!(report.impact.len(), 1);
864        assert!(report.impact[0].starts_with("New table"));
865        // Trailing prose must NOT land in either bucket.
866        assert!(
867            !report.impact.iter().any(|l| l.contains("This concludes")),
868            "trailing commentary leaked into impact: {:?}",
869            report.impact
870        );
871        assert!(
872            !report.why.iter().any(|l| l.contains("This concludes")),
873            "trailing commentary leaked into why: {:?}",
874            report.why
875        );
876    }
877
878    /// Phase 8.4 — fallback path: unstructured response (no WHY: /
879    /// IMPACT: headers). The whole body becomes `why`, `impact`
880    /// stays empty. Operator still sees the model's commentary.
881    #[test]
882    fn explain_fallback_treats_unstructured_as_why() {
883        let body = "Tags help categorize posts.\n\
884- New table is added.\n\
885- Many-to-many relationship is introduced.";
886        let report = parse_explain_response(body);
887        assert!(
888            report.impact.is_empty(),
889            "no headers → impact must be empty"
890        );
891        assert_eq!(report.why.len(), 3);
892        assert_eq!(report.why[0], "Tags help categorize posts.");
893        assert_eq!(report.why[1], "New table is added.");
894    }
895
896    /// Phase 9.1 — `update` MUST refuse a result that empties a
897    /// non-empty schema. Truth table:
898    ///   non-empty → empty   → Err(EmptyResult)   (the dangerous case)
899    ///   non-empty → non-empty → Ok(())
900    ///   empty     → empty   → Ok(())            (no-op, fine)
901    ///   empty     → non-empty → Ok(())          (genuine first-time fill)
902    #[test]
903    fn update_refuses_empty_result() {
904        let one_model = crate::schema::Schema {
905            version: crate::schema::SCHEMA_VERSION,
906            rustio_version: "1.0.0".into(),
907            models: vec![crate::schema::SchemaModel {
908                name: "Post".into(),
909                table: "posts".into(),
910                admin_name: "posts".into(),
911                display_name: "Posts".into(),
912                singular_name: "Post".into(),
913                fields: vec![],
914                relations: vec![],
915                core: false,
916            }],
917        };
918        let empty = crate::schema::Schema {
919            version: crate::schema::SCHEMA_VERSION,
920            rustio_version: "1.0.0".into(),
921            models: vec![],
922        };
923
924        // The dangerous case — must reject.
925        let err = check_not_empty(&one_model, &empty)
926            .expect_err("non-empty → empty must reject");
927        assert!(
928            matches!(err, GenerateError::EmptyResult),
929            "expected EmptyResult, got {err:?}"
930        );
931        assert_eq!(
932            err.to_string(),
933            "Refusing to apply update: schema would become empty"
934        );
935
936        // Other paths must pass.
937        check_not_empty(&one_model, &one_model)
938            .expect("non-empty preservation must pass");
939        check_not_empty(&empty, &empty).expect("empty no-op must pass");
940        check_not_empty(&empty, &one_model)
941            .expect("first-time fill must pass");
942    }
943
944    /// Phase 8.0 — invalid Schema (here: unknown field type
945    /// `"FooBar"`) must be rejected with `Schema(_)` so the CLI can
946    /// abort before writing anything to disk.
947    #[test]
948    fn parse_response_rejects_invalid_schema() {
949        let body = r#"{
950            "version": 2,
951            "rustio_version": "1.0.0",
952            "models": [
953                {
954                    "name": "Post",
955                    "table": "posts",
956                    "admin_name": "posts",
957                    "display_name": "Posts",
958                    "singular_name": "Post",
959                    "fields": [
960                        { "name": "id",    "type": "i64",    "nullable": false, "editable": true },
961                        { "name": "title", "type": "FooBar", "nullable": false, "editable": true }
962                    ],
963                    "relations": []
964                }
965            ]
966        }"#;
967
968        let err = parse_response(body).expect_err("invalid type must reject");
969        match err {
970            GenerateError::Schema(SchemaError::InvalidType { ref ty, .. }) => {
971                assert_eq!(ty, "FooBar");
972            }
973            other => panic!("expected Schema(InvalidType), got {other:?}"),
974        }
975    }
976}
rustio_core/ai_gen/mod.rs

rustio_core/ai_gen/
mod.rs