semver-analyzer-llm 0.0.4

LLM-based behavioral analysis for the semver-analyzer
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
//! Prompt templates for LLM-based behavioral analysis.
//!
//! Each prompt is designed to produce structured JSON output matching
//! the `FunctionSpec` or `BreakingVerdict` schemas. Template-guided
//! generation reduces hallucination (Preguss finding: ~30% → ~11-19%).

use semver_analyzer_core::{ChangedFunction, FunctionSpec, LlmCategoryDefinition, TestDiff};

/// JSON schema template for `FunctionSpec`.
///
/// Included in every spec inference prompt so the LLM knows the exact
/// structure to produce.
const FUNCTION_SPEC_SCHEMA: &str = r#"{
  "preconditions": [
    {
      "parameter": "parameter name",
      "condition": "what is checked (e.g., must be non-empty string)",
      "on_violation": "what happens (e.g., throws TypeError)"
    }
  ],
  "postconditions": [
    {
      "condition": "when this output is produced",
      "returns": "what is returned/resolved"
    }
  ],
  "error_behavior": [
    {
      "trigger": "what causes the error",
      "error_type": "error class name (e.g., TypeError)",
      "message_pattern": "optional: error message substring"
    }
  ],
  "side_effects": [
    {
      "target": "what external state is changed",
      "action": "what is done (e.g., inserts row, emits event)",
      "condition": "optional: when this occurs"
    }
  ],
  "notes": ["any behavioral nuances that don't fit above"]
}"#;

/// Build a prompt for spec inference from function body alone.
pub fn build_spec_inference_prompt(function_body: &str, signature: &str) -> String {
    format!(
        r#"Analyze this function and produce a behavioral specification as a JSON object.

## Function signature:
```
{signature}
```

## Function body:
```
{function_body}
```

## Task:
Describe what this function guarantees — its preconditions (input validation),
postconditions (what it returns for given inputs), error behavior (what errors
it throws and when), side effects (external state changes), and any behavioral
notes that don't fit the structured fields.

## Output format:
Return ONLY a JSON object matching this schema (no other text):

```json
{FUNCTION_SPEC_SCHEMA}
```

Rules:
- Use empty arrays [] for categories with no entries
- Be specific and concrete in descriptions
- For preconditions, list actual parameter validation checks in the code
- For postconditions, describe what the function returns under different conditions
- For error_behavior, only list errors the code explicitly throws/rejects
- For side_effects, only list observable external state changes
- For notes, capture any behavioral nuances not covered above

Respond with ONLY the JSON object inside a ```json fenced block."#,
        signature = signature,
        function_body = function_body,
        FUNCTION_SPEC_SCHEMA = FUNCTION_SPEC_SCHEMA,
    )
}

/// Build a prompt for spec inference with test context.
pub fn build_spec_inference_with_test_prompt(
    function_body: &str,
    signature: &str,
    test_context: &TestDiff,
) -> String {
    let test_section = format_test_context(test_context);

    format!(
        r#"Analyze this function and produce a behavioral specification as a JSON object.

## Function signature:
```
{signature}
```

## Function body:
```
{function_body}
```

## Associated test diff:
The following test file was changed alongside this function. The test assertions
provide concrete examples of expected behavior — use them to ground your analysis.

{test_section}

## Task:
Describe what this function guarantees — its preconditions (input validation),
postconditions (what it returns for given inputs), error behavior (what errors
it throws and when), side effects (external state changes), and any behavioral
notes that don't fit the structured fields.

Pay special attention to the test assertions — they encode the developer's
explicit expectations of how this function should behave.

## Output format:
Return ONLY a JSON object matching this schema (no other text):

```json
{FUNCTION_SPEC_SCHEMA}
```

Rules:
- Use empty arrays [] for categories with no entries
- Be specific and concrete in descriptions
- Ground your analysis in the actual code and test assertions

Respond with ONLY the JSON object inside a ```json fenced block."#,
        signature = signature,
        function_body = function_body,
        test_section = test_section,
        FUNCTION_SPEC_SCHEMA = FUNCTION_SPEC_SCHEMA,
    )
}

/// Build a prompt for comparing two specs (Tier 2 LLM fallback).
pub fn build_spec_comparison_prompt(old: &FunctionSpec, new: &FunctionSpec) -> String {
    let old_json = serde_json::to_string_pretty(old).unwrap_or_else(|_| "{}".to_string());
    let new_json = serde_json::to_string_pretty(new).unwrap_or_else(|_| "{}".to_string());

    format!(
        r#"Compare these two behavioral specifications for the SAME function at two
different versions. Determine if the changes are breaking.

## Old version spec (v1):
```json
{old_json}
```

## New version spec (v2):
```json
{new_json}
```

## Breaking change criteria:
A change is breaking if:
- Preconditions are TIGHTENED (function accepts less input than before)
- Postconditions are WEAKENED (function guarantees less output than before)
- Error types changed (callers catching specific errors will break)
- New errors added for inputs that previously succeeded
- Side effects removed or changed (consumers depending on them will break)

A change is NOT breaking if:
- Preconditions are RELAXED (function accepts more input)
- Postconditions are STRENGTHENED (function guarantees more)
- Error cases removed (function is more permissive)
- New side effects added (unless they cause observable issues)

## Output format:
Return ONLY a JSON object:

```json
{{
  "is_breaking": true/false,
  "reasons": ["list of specific breaking changes found"],
  "confidence": 0.0-1.0
}}
```

Respond with ONLY the JSON object inside a ```json fenced block."#,
        old_json = old_json,
        new_json = new_json,
    )
}

/// Build a prompt for checking whether a behavioral break propagates
/// through a caller.
pub fn build_propagation_check_prompt(
    caller_body: &str,
    caller_signature: &str,
    callee_name: &str,
    evidence_description: &str,
) -> String {
    let evidence_desc = evidence_description;

    format!(
        r#"A behavioral change was detected in the function `{callee_name}`.
Determine whether this change PROPAGATES through the following caller function,
or whether the caller ABSORBS it.

## Caller signature:
```
{caller_signature}
```

## Caller body:
```
{caller_body}
```

## Behavioral change in `{callee_name}`:
{evidence_desc}

## Does the caller propagate this change?

The caller ABSORBS the change (does NOT propagate) if it:
- Ignores the callee's return value
- Catches and handles the callee's new error behavior
- Only calls the callee on code paths that don't trigger the change
- Applies its own validation that masks the change

The caller PROPAGATES the change if:
- It passes through the callee's return value to its own callers
- It doesn't handle the callee's new error cases
- The behavioral change affects the caller's observable output

## Output format:
Return ONLY a JSON object:

```json
{{
  "propagates": true/false,
  "reasoning": "brief explanation"
}}
```

Respond with ONLY the JSON object inside a ```json fenced block."#,
        callee_name = callee_name,
        caller_signature = caller_signature,
        caller_body = caller_body,
        evidence_desc = evidence_desc,
    )
}

// ── File-level behavioral analysis ──────────────────────────────────────

/// Build a prompt for file-level behavioral breaking change analysis.
///
/// Instead of per-function spec inference (2+ LLM calls per function),
/// this sends the git diff for a file and the list of changed function
/// signatures in one shot — 1 LLM call per file.
pub fn build_file_behavioral_prompt(
    file_path: &str,
    diff_content: &str,
    changed_functions: &[ChangedFunction],
    test_diff: Option<&str>,
    categories: &[LlmCategoryDefinition],
) -> String {
    let mut func_list = String::new();
    for f in changed_functions {
        func_list.push_str(&format!(
            "- `{}` ({})\n  Old: `{}`\n  New: `{}`\n",
            f.name,
            if f.visibility == semver_analyzer_core::Visibility::Exported {
                "exported"
            } else {
                "internal"
            },
            f.old_signature.as_deref().unwrap_or("(added)"),
            f.new_signature.as_deref().unwrap_or("(removed)"),
        ));
    }

    // Truncate very large diffs to avoid exceeding context limits
    let diff_truncated = if diff_content.len() > 15000 {
        format!(
            "{}\n\n... [diff truncated, {} bytes total] ...",
            &diff_content[..15000],
            diff_content.len()
        )
    } else {
        diff_content.to_string()
    };

    let func_section = if func_list.is_empty() {
        "(No function body changes detected — analyze the diff for type-level and behavioral changes)".to_string()
    } else {
        func_list
    };

    let test_diff_section = if let Some(td) = test_diff {
        let truncated_td = if td.len() > 8_000 { &td[..8_000] } else { td };
        format!(
            "\n## Associated test diff:\n\
             The following diff shows how this component's tests/examples changed,\n\
             revealing expected usage pattern changes:\n\
             ```diff\n{}\n```\n",
            truncated_td
        )
    } else {
        String::new()
    };

    // Build the category section dynamically from language definitions
    let category_section = build_category_section(categories);
    let category_enum = build_category_enum(categories);

    format!(
        r#"Analyze this file diff for breaking changes.

## File: `{file_path}`

## Changed functions in this file:
{func_section}

## Git diff:
```diff
{diff}
```
{test_diff_section}
## Task:
Identify breaking changes in these categories:

### A. Behavioral breaking changes
Changes that alter the OBSERVABLE BEHAVIOR of exported functions/components.
{category_section}
### B. API type-level breaking changes
Changes to type signatures that static analysis may miss:
1. **Interface/class `extends` changed**: changes available members
2. **Member optionality changed**: member went from optional to required or
   vice versa
3. **Enum/union members removed or renamed**: e.g., variant value
   removed or replaced
4. **Type narrowed or widened**: e.g., `string | null` → `string`
5. **Default value changed**: default changed in a way that alters behavior
6. **Member migration**: When a member is removed and its functionality moved
   to a child/sibling type, include `removal_disposition`

## What to EXCLUDE:
- New additions (new members, new functions, new enum variants)
- Internal refactoring that doesn't change observable behavior
- Comment-only changes
- Import reorganization
- Changes already obvious from type signature removal/addition

## Output format:
Return ONLY a JSON object:

```json
{{{{
  "breaking_behavioral_changes": [
    {{{{
      "symbol": "<TypeName or functionName>",
      "kind": "class",
      "category": "{category_enum}",
      "description": "<what changed and why it breaks consumers>",
      "is_internal_only": false
    }}}}
  ],
  "breaking_api_changes": [
    {{{{
      "symbol": "<TypeName.memberName or TypeName>",
      "change": "<signature_changed|type_changed|default_changed|removed>",
      "description": "<what changed in the type signature>",
      "removal_disposition": null
    }}}}
  ]
}}}}
```

Rules:
- For behavioral: use "class" for components/types, "function" for others
- For behavioral: ALWAYS include a "category" from the list above
- For behavioral: set `is_internal_only` to true when the change only
  affects internal rendering and does NOT require consumer code changes.
  Set false when consumers must update their code.
- For API: use "TypeName.memberName" format for member changes
- For API removals: include `removal_disposition` when you can determine
  where the member's functionality went:
  - `{{{{"type": "moved_to_related_type", "target_type": "ChildName", "mechanism": "prop"}}}}` —
    member moved to a named member on a child/related type
  - `{{{{"type": "moved_to_related_type", "target_type": "ChildName", "mechanism": "children"}}}}` —
    member value should now be passed as children of the child type
   - `{{{{"type": "replaced_by_member", "new_member": "newMemberName"}}}}` —
     replaced by a different member on the SAME type. Rules:
     * `new_member` MUST be an exact member name that was ADDED to the same type in the diff
     * The new member must serve the same purpose
     * If the types are fundamentally different, use `truly_removed` instead
     * If unsure which member replaced it, use `null` instead
     * Do NOT guess — if you cannot find a clear 1:1 replacement, use `null`
   - `{{{{"type": "made_automatic"}}}}` — functionality is now inferred automatically
   - `{{{{"type": "truly_removed"}}}}` — removed with no replacement
   - `null` if you cannot determine the disposition
- Keep descriptions specific and actionable
- Only include changes that would break existing consumers
- Use empty arrays for categories with no changes
- Respond with ONLY the JSON object inside a ```json fenced block."#,
        file_path = file_path,
        func_section = func_section,
        diff = diff_truncated,
        test_diff_section = test_diff_section,
        category_section = category_section,
        category_enum = category_enum,
    )
}

// ── Helpers ─────────────────────────────────────────────────────────────

/// Format test context for inclusion in a prompt.
fn format_test_context(test_diff: &TestDiff) -> String {
    let mut parts = Vec::new();

    parts.push(format!("Test file: {}", test_diff.test_file.display()));

    if !test_diff.removed_assertions.is_empty() {
        parts.push("Removed assertions:".to_string());
        for line in &test_diff.removed_assertions {
            parts.push(format!("  - {}", line));
        }
    }

    if !test_diff.added_assertions.is_empty() {
        parts.push("Added assertions:".to_string());
        for line in &test_diff.added_assertions {
            parts.push(format!("  + {}", line));
        }
    }

    if !test_diff.full_diff.is_empty() {
        parts.push("Full diff:".to_string());
        parts.push(format!("```diff\n{}\n```", test_diff.full_diff));
    }

    parts.join("\n")
}

/// Build the numbered category list for the behavioral section of the prompt.
///
/// Produces text like:
/// ```text
/// For each, assign a `category` from: `dom_structure`, `css_class`, ...
///
/// 1. **DOM/render changes** (category: `dom_structure`): Changed element types...
/// 2. **CSS changes** (category: `css_class`): Class name renames...
/// ```
fn build_category_section(categories: &[LlmCategoryDefinition]) -> String {
    if categories.is_empty() {
        return String::new();
    }

    let id_list: Vec<String> = categories.iter().map(|c| format!("`{}`", c.id)).collect();
    let mut section = format!(
        "For each, assign a `category` from: {}.\n\n",
        id_list.join(", ")
    );

    for (i, cat) in categories.iter().enumerate() {
        section.push_str(&format!(
            "{}. **{}** (category: `{}`): {}\n",
            i + 1,
            cat.label,
            cat.id,
            cat.description
        ));
    }

    section
}

/// Build the category enum string for the JSON schema in the prompt.
///
/// Produces: `<dom_structure|css_class|css_variable|...>`
fn build_category_enum(categories: &[LlmCategoryDefinition]) -> String {
    if categories.is_empty() {
        return "<category>".to_string();
    }
    let ids: Vec<&str> = categories.iter().map(|c| c.id.as_str()).collect();
    format!("<{}>", ids.join("|"))
}

// ── Rename inference prompts ──────────────────────────────────────────

/// Build the prompt for constant rename pattern inference (Call 1).
///
/// Given samples of removed and added constant names from a package,
/// asks the LLM to identify systematic regex-based rename patterns.
pub fn build_constant_rename_prompt(
    removed_sample: &[&str],
    added_sample: &[&str],
    package_name: &str,
    from_ref: &str,
    to_ref: &str,
) -> String {
    let removed_list = removed_sample
        .iter()
        .map(|s| format!("  {}", s))
        .collect::<Vec<_>>()
        .join("\n");
    let added_list = added_sample
        .iter()
        .map(|s| format!("  {}", s))
        .collect::<Vec<_>>()
        .join("\n");

    format!(
        r#"These exported constants were removed from {package_name} between {from_ref} and {to_ref}:
{removed_list}

These exported constants were added:
{added_list}

Identify ALL systematic naming patterns that map removed constant names to added constant names.

Return ONLY a JSON array of regex substitution rules inside a ```json fenced block:
```json
[
  {{"match": "regex pattern matching removed names", "replace": "replacement using capture groups"}}
]
```

Rules:
- Use capture groups to generalize patterns (e.g., "(.*)Top$" not just "c_alert_PaddingTop")
- Each pattern should match multiple constants, not just one
- Order from most specific to least specific
- Only include patterns where applying the substitution to a removed name produces a name in the added list
- Do not include identity patterns where match and replace produce the same string"#,
        package_name = package_name,
        from_ref = from_ref,
        to_ref = to_ref,
        removed_list = removed_list,
        added_list = added_list,
    )
}

/// Build the prompt for interface/component rename mapping inference (Call 2).
///
/// Given removed and added interfaces with their member lists,
/// asks the LLM to identify which removed interfaces map to which added ones.
pub fn build_interface_rename_prompt(
    removed: &[(&str, &[String])], // (name, member_names)
    added: &[(&str, &[String])],   // (name, member_names)
    package_name: &str,
    from_ref: &str,
    to_ref: &str,
) -> String {
    let removed_list = removed
        .iter()
        .map(|(name, members)| {
            if members.is_empty() {
                format!("  {} (no members)", name)
            } else {
                format!("  {} (members: {})", name, members.join(", "))
            }
        })
        .collect::<Vec<_>>()
        .join("\n");

    let added_list = added
        .iter()
        .map(|(name, members)| {
            if members.is_empty() {
                format!("  {} (no members)", name)
            } else {
                format!("  {} (members: {})", name, members.join(", "))
            }
        })
        .collect::<Vec<_>>()
        .join("\n");

    format!(
        r#"These interfaces/components were removed from {package_name} between {from_ref} and {to_ref}:
{removed_list}

These interfaces/components were added:
{added_list}

Identify which removed interfaces map to which added interfaces (renames/replacements).
Consider:
- Name similarity (e.g., TextProps → ContentProps)
- Member overlap (same prop names appearing in both)
- Typo corrections (e.g., FormFiledGroup → FormFieldGroup)
- Functional equivalence (component that does the same thing under a new name)

Return ONLY a JSON array of mappings inside a ```json fenced block:
```json
[
  {{"old_name": "removed name", "new_name": "added name", "confidence": "high|medium|low", "reason": "brief explanation"}}
]
```

Rules:
- Only include mappings where the added interface is a clear replacement for the removed one
- Set confidence to "high" for clear renames/typo fixes, "medium" for functional replacements with different names, "low" for uncertain matches
- If a removed interface has no replacement in the added list, omit it
- Return an empty array if no mappings can be determined"#,
        package_name = package_name,
        from_ref = from_ref,
        to_ref = to_ref,
        removed_list = removed_list,
        added_list = added_list,
    )
}

// Note: build_hierarchy_inference_prompt, build_suffix_rename_prompt, and
// build_composition_pattern_prompt have been moved to crates/ts/src/llm_prompts.rs.
// These prompts contain React/JSX/CSS-specific terminology and belong in the
// language crate, not the generic LLM infrastructure crate.

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::PathBuf;

    #[test]
    fn spec_inference_prompt_contains_signature() {
        let prompt =
            build_spec_inference_prompt("{ return x + 1; }", "function add(x: number): number");
        assert!(prompt.contains("function add(x: number): number"));
        assert!(prompt.contains("return x + 1"));
        assert!(prompt.contains("preconditions"));
        assert!(prompt.contains("postconditions"));
    }

    #[test]
    fn spec_inference_prompt_includes_schema() {
        let prompt = build_spec_inference_prompt("{ return 1; }", "function f(): number");
        assert!(prompt.contains("parameter"));
        assert!(prompt.contains("condition"));
        assert!(prompt.contains("on_violation"));
        assert!(prompt.contains("error_type"));
        assert!(prompt.contains("side_effects"));
    }

    #[test]
    fn test_context_prompt_includes_assertions() {
        let test_diff = TestDiff {
            test_file: PathBuf::from("test.ts"),
            removed_assertions: vec!["expect(result).toBe(5)".into()],
            added_assertions: vec!["expect(result).toBe(10)".into()],
            has_assertion_changes: true,
            full_diff: String::new(),
        };

        let prompt = build_spec_inference_with_test_prompt(
            "{ return x + 1; }",
            "function add(x: number): number",
            &test_diff,
        );
        assert!(prompt.contains("expect(result).toBe(5)"));
        assert!(prompt.contains("expect(result).toBe(10)"));
        assert!(prompt.contains("test assertions"));
    }

    #[test]
    fn comparison_prompt_includes_both_specs() {
        let old = FunctionSpec {
            preconditions: vec![],
            postconditions: vec![semver_analyzer_core::Postcondition {
                condition: "always".into(),
                returns: "5".into(),
            }],
            error_behavior: vec![],
            side_effects: vec![],
            notes: vec![],
        };
        let new = FunctionSpec {
            preconditions: vec![],
            postconditions: vec![semver_analyzer_core::Postcondition {
                condition: "always".into(),
                returns: "10".into(),
            }],
            error_behavior: vec![],
            side_effects: vec![],
            notes: vec![],
        };

        let prompt = build_spec_comparison_prompt(&old, &new);
        assert!(prompt.contains("\"returns\": \"5\""));
        assert!(prompt.contains("\"returns\": \"10\""));
        assert!(prompt.contains("breaking"));
    }

    #[test]
    fn file_behavioral_prompt_includes_diff_and_functions() {
        let funcs = vec![ChangedFunction {
            qualified_name: "src/Modal.tsx::Modal".into(),
            name: "Modal".into(),
            file: std::path::PathBuf::from("src/Modal.tsx"),
            line: 10,
            kind: semver_analyzer_core::SymbolKind::Function,
            visibility: semver_analyzer_core::Visibility::Exported,
            old_body: Some("{ return <div>old</div>; }".into()),
            new_body: Some("{ return <section>new</section>; }".into()),
            old_signature: Some("function Modal(props: ModalProps): JSX.Element".into()),
            new_signature: Some("function Modal(props: ModalProps): JSX.Element".into()),
        }];

        let categories = vec![
            LlmCategoryDefinition {
                id: "dom_structure".into(),
                label: "DOM/render changes".into(),
                description: "Changed element types".into(),
            },
            LlmCategoryDefinition {
                id: "css_class".into(),
                label: "CSS changes".into(),
                description: "Class name renames".into(),
            },
        ];
        let prompt = build_file_behavioral_prompt(
            "src/Modal.tsx",
            "- <div>old</div>\n+ <section>new</section>",
            &funcs,
            None,
            &categories,
        );
        assert!(prompt.contains("Modal"));
        assert!(prompt.contains("src/Modal.tsx"));
        assert!(prompt.contains("<div>old</div>"));
        assert!(prompt.contains("breaking_behavioral_changes"));
        assert!(prompt.contains("exported"));
        // Categories appear in the prompt
        assert!(prompt.contains("`dom_structure`"));
        assert!(prompt.contains("`css_class`"));
        assert!(prompt.contains("DOM/render changes"));
        assert!(prompt.contains("<dom_structure|css_class>"));
    }

    #[test]
    fn propagation_prompt_includes_callee_info() {
        let evidence_desc =
            "Test assertions changed:\n  - expect(x).toBe(5)\n  + expect(x).toBe(10)\n";

        let prompt = build_propagation_check_prompt(
            "{ return helper() + 1; }",
            "function main(): number",
            "helper",
            evidence_desc,
        );
        assert!(prompt.contains("helper"));
        assert!(prompt.contains("return helper() + 1"));
        assert!(prompt.contains("propagates"));
    }
}