semver_analyzer_core/traits.rs
1//! Trait definitions for language-pluggable analysis.
2//!
3//! Adding a new language means implementing these traits. The orchestrator,
4//! diff engine, and output format are language-agnostic and reused unchanged.
5//!
6//! ## Trait ownership
7//!
8//! | Trait | Used by | Per-language? |
9//! |---|---|---|
10//! | `Language` | TD + BU | Yes (unified analysis pipeline) |
11//! | `BehaviorAnalyzer` | BU | No (language-agnostic, LLM-based) |
12
13use crate::types::{
14 ApiSurface, BehavioralChangeKind, BodyAnalysisResult, BreakingVerdict, Caller, ChangedFunction,
15 EvidenceType, ExpectedChild, FunctionSpec, Reference, StructuralChange, Symbol, SymbolKind,
16 TestDiff, TestFile, Visibility,
17};
18use anyhow::Result;
19use serde::{de::DeserializeOwned, Serialize};
20use std::collections::{BTreeSet, HashMap};
21use std::fmt::Debug;
22use std::path::{Path, PathBuf};
23use std::sync::Arc;
24
25// ── BU Traits (language-agnostic, LLM-based) ───────────────────────────
26
27/// Analyze behavioral changes via LLM-based spec inference.
28///
29/// Language-agnostic: the function body and signature are passed as
30/// strings. The LLM generates template-constrained `FunctionSpec`
31/// objects, which are compared mechanically (Tier 1) or via LLM
32/// fallback (Tier 2).
33///
34/// Implementations may use:
35/// - Direct LLM API calls (OpenAI, Anthropic, etc.)
36/// - `goose run --no-session -q -t "..."`
37/// - `opencode run "..."`
38/// - Any other agent CLI via `--llm-command`
39pub trait BehaviorAnalyzer {
40 /// Infer a function's behavioral spec from its body alone.
41 ///
42 /// Lower confidence than `infer_spec_with_test_context` because
43 /// the LLM has no grounded examples of expected behavior.
44 fn infer_spec(&self, function_body: &str, signature: &str) -> Result<FunctionSpec>;
45
46 /// Infer a spec with additional context from the test file.
47 ///
48 /// The test assertions give the LLM concrete examples of expected
49 /// behavior — reducing hallucination compared to body-only inference.
50 fn infer_spec_with_test_context(
51 &self,
52 function_body: &str,
53 signature: &str,
54 test_context: &TestDiff,
55 ) -> Result<FunctionSpec>;
56
57 /// Compare two specs and determine if the change is breaking.
58 ///
59 /// Uses a two-tier approach:
60 /// - Tier 1: Structural comparison on `FunctionSpec` fields
61 /// - Tier 2: LLM fallback for `notes` diffs and ambiguous matches
62 fn specs_are_breaking(&self, old: &FunctionSpec, new: &FunctionSpec)
63 -> Result<BreakingVerdict>;
64
65 /// Check whether a caller propagates a behavioral break from a callee.
66 ///
67 /// Given a caller's body/signature and evidence of a behavioral
68 /// break in a callee it invokes, determine whether the caller's
69 /// observable behavior actually changes. The caller might absorb
70 /// the break by:
71 /// - Ignoring the callee's return value
72 /// - Catching and handling the callee's new error behavior
73 /// - Only invoking the callee on code paths that don't trigger
74 /// the behavioral change
75 /// - Applying its own validation that masks the change
76 ///
77 /// Returns true if the break propagates (caller IS affected),
78 /// false if the caller absorbs it (NOT affected).
79 fn check_propagation(
80 &self,
81 caller_body: &str,
82 caller_signature: &str,
83 callee_name: &str,
84 evidence_description: &str,
85 ) -> Result<bool>;
86}
87
88// ── Language abstraction traits (multi-language architecture) ────────────
89//
90// These traits define the integration point for multi-language support.
91// See `design/01-traits.md` for detailed documentation.
92
93/// Language-specific semantic rules consumed by the diff engine.
94///
95/// These encode the places where "is this breaking?" or "are these related?"
96/// differ fundamentally by language. The diff engine calls these methods
97/// instead of hardcoding language-specific rules.
98pub trait LanguageSemantics<M: Default + Clone + PartialEq = ()> {
99 /// Is adding this member to this container a breaking change?
100 ///
101 /// This is the single rule that differs most fundamentally by language:
102 /// - TypeScript: breaking only if the member is required (non-optional).
103 /// - Go: ALWAYS breaking for interfaces (all implementors must add it).
104 /// - Java: breaking for abstract methods, not for default methods.
105 /// - C#: breaking for abstract members on interfaces.
106 /// - Python: breaking for abstract methods on Protocol/ABC.
107 fn is_member_addition_breaking(&self, container: &Symbol<M>, member: &Symbol<M>) -> bool;
108
109 /// Are these two symbols part of the same logical family/group?
110 ///
111 /// Used to scope migration detection. When a symbol is removed, only
112 /// symbols in the same family are considered as potential absorption targets.
113 ///
114 /// - TypeScript/React: same component directory
115 /// - Go: same package
116 /// - Java: same package
117 /// - Python: same module
118 fn same_family(&self, a: &Symbol<M>, b: &Symbol<M>) -> bool;
119
120 /// Are these two symbols the same concept, possibly at different paths?
121 ///
122 /// When true, migration detection does a full member comparison (all members,
123 /// not just newly-added ones) because the candidate is assumed to be a direct
124 /// replacement for the removed symbol.
125 ///
126 /// Resolves companion types linked by naming convention:
127 /// - TypeScript: `Button` and `ButtonProps` (component + its props interface)
128 /// - Go: `Client` and `ClientOptions` (struct + its configuration)
129 /// - Java: `UserService` and `UserServiceImpl` (interface + implementation)
130 fn same_identity(&self, a: &Symbol<M>, b: &Symbol<M>) -> bool;
131
132 /// Numeric rank for a visibility level (higher = more visible).
133 ///
134 /// Used to determine if visibility was reduced (breaking) or increased.
135 /// The ordering differs by language:
136 /// - TypeScript: Private(0) < Internal(1) < Protected(1) < Public(2) < Exported(3)
137 /// - Java: Private(0) < PackagePrivate(1) < Protected(2) < Public(3)
138 /// - Go: Internal(0) < Exported(1)
139 fn visibility_rank(&self, v: Visibility) -> u8;
140
141 /// Parse union/constrained type values for fine-grained diffing.
142 ///
143 /// TypeScript: parse `'primary' | 'secondary' | 'danger'`.
144 /// Python: parse `Literal['a', 'b']`.
145 /// Most other languages return `None`.
146 fn parse_union_values(&self, _type_str: &str) -> Option<BTreeSet<String>> {
147 None
148 }
149
150 /// Whether a return type string represents an async wrapper.
151 ///
152 /// Used by the diff engine to detect sync→async and async→sync changes,
153 /// which are always breaking regardless of the inner type.
154 ///
155 /// TypeScript/JavaScript: `Promise<T>`
156 /// Python: `Coroutine[...]`, `Awaitable[...]`
157 /// Java: `CompletableFuture<T>`, `Future<T>`
158 /// Go: returns `false` (async handled via goroutines, not return types)
159 fn is_async_wrapper(&self, _type_str: &str) -> bool {
160 false
161 }
162
163 /// Format an import/use statement change hint for migration descriptions.
164 ///
165 /// When a symbol is renamed across packages, the diff engine includes
166 /// import guidance so consumers know to update their import paths.
167 ///
168 /// TypeScript: `"replace \`import { X } from 'old-pkg'\` with \`import { X } from 'new-pkg'\`"`
169 /// Go: `"replace \`\"old/pkg\"\` with \`\"new/pkg\"\`"`
170 /// Default: generic format without language-specific syntax.
171 fn format_import_change(&self, symbol: &str, old_path: &str, new_path: &str) -> String {
172 format!(
173 "replace import of `{}` from `{}` with `{}`",
174 symbol, old_path, new_path,
175 )
176 }
177
178 /// Should this symbol be excluded from diff analysis?
179 ///
180 /// Called by the diff engine to filter out symbols that should not be
181 /// compared. The most common case is TypeScript's `export * from '...'`
182 /// star re-export directives.
183 ///
184 /// TypeScript: `sym.name == "*"` (star re-exports)
185 /// Default: `false` (all symbols are analyzed)
186 fn should_skip_symbol(&self, _sym: &Symbol<M>) -> bool {
187 false
188 }
189
190 /// Human-readable label for members when building migration descriptions.
191 ///
192 /// TypeScript: `"props"` (component properties)
193 /// Go: `"fields"` (struct fields)
194 /// Default: `"members"`
195 fn member_label(&self) -> &'static str {
196 "members"
197 }
198
199 /// Extract a fallback key for rename matching from a symbol's metadata.
200 ///
201 /// When fingerprint-based rename detection fails, the diff engine uses
202 /// this method to extract an alternative matching key. For TypeScript
203 /// CSS tokens, this parses the resolved CSS value from the `.d.ts`
204 /// type annotation (e.g., the string `"#151515"` from a CSS variable).
205 ///
206 /// TypeScript: parses `["value"]: "..."` from the return type annotation
207 /// Default: `None` (no fallback key)
208 fn extract_rename_fallback_key(&self, _sym: &Symbol<M>) -> Option<String> {
209 None
210 }
211
212 /// Normalize a qualified name for relocation detection.
213 ///
214 /// Strips language-specific path segments that represent lifecycle
215 /// modifiers (e.g., TypeScript's `/deprecated/` and `/next/` directories).
216 /// Symbols with matching canonical names are detected as relocations
217 /// rather than separate removals and additions.
218 ///
219 /// TypeScript: strips `/deprecated/` and `/next/` segments
220 /// Default: returns the name unchanged
221 fn canonical_name_for_relocation(&self, qualified_name: &str) -> String {
222 qualified_name.to_string()
223 }
224
225 /// Classify a relocation based on old and new qualified names.
226 ///
227 /// Returns a human-readable label describing the relocation direction
228 /// (e.g., "moved to deprecated exports", "promoted from next to stable").
229 /// Returns `None` for generic relocations with no special classification.
230 ///
231 /// TypeScript: detects `/deprecated/` and `/next/` transitions
232 /// Default: `None` (no classification)
233 fn classify_relocation(&self, _old_qname: &str, _new_qname: &str) -> Option<&'static str> {
234 None
235 }
236
237 /// Derive the import subpath for a symbol, used in migration descriptions.
238 ///
239 /// When a symbol moves between submodules (e.g., from main exports to
240 /// `/deprecated/` exports), the import path changes. This method derives
241 /// the effective import path from the package name and qualified name.
242 ///
243 /// TypeScript: appends `/deprecated` or `/next` based on qualified name
244 /// Default: returns the package name unchanged
245 fn derive_import_subpath(&self, package: Option<&str>, _qualified_name: &str) -> String {
246 package.unwrap_or("unknown").to_string()
247 }
248
249 /// Produce additional structural changes by diffing language-specific
250 /// metadata on two matched symbols.
251 ///
252 /// Called by the diff engine for each pair of symbols that matched by
253 /// qualified name. The default implementation returns no changes.
254 ///
255 /// TypeScript: could diff `rendered_components` or `css` metadata.
256 /// Default: empty (no language-specific metadata diffing)
257 fn diff_language_data(&self, _old: &Symbol<M>, _new: &Symbol<M>) -> Vec<StructuralChange> {
258 vec![]
259 }
260
261 /// Post-process the change list before returning from diff_surfaces.
262 ///
263 /// TypeScript: dedup default export changes.
264 /// Most languages: no-op.
265 fn post_process(&self, _changes: &mut Vec<StructuralChange>) {}
266
267 /// If this language supports component hierarchy inference (e.g., React,
268 /// Vue, Django templates), return the hierarchy semantics implementation.
269 ///
270 /// The orchestrator uses this to prepare data for LLM hierarchy inference.
271 /// The trait is NOT responsible for LLM calls or prompt construction.
272 fn hierarchy(&self) -> Option<&dyn HierarchySemantics<M>> {
273 None
274 }
275
276 /// If this language supports LLM-based rename inference (e.g., CSS
277 /// physical→logical property renames, interface rename mappings),
278 /// return the rename semantics implementation.
279 ///
280 /// The orchestrator uses this to prepare data for LLM rename inference.
281 /// The trait is NOT responsible for LLM calls or prompt construction.
282 fn renames(&self) -> Option<&dyn RenameSemantics> {
283 None
284 }
285
286 /// If this language has deterministic body-level analysis (e.g., JSX diff,
287 /// CSS variable scanning for TypeScript), return the body analysis
288 /// implementation.
289 ///
290 /// The orchestrator calls this during BU Phase 1 to detect behavioral
291 /// breaks from function body changes without LLM assistance.
292 fn body_analyzer(&self) -> Option<&dyn BodyAnalysisSemantics> {
293 None
294 }
295
296 /// Primitive type names for this language, used by the diff engine's
297 /// structural similarity comparison.
298 ///
299 /// When two types are compared for structural similarity (e.g., during
300 /// rename detection), types matching these names are classified as
301 /// primitives. Two primitives of different names are structurally similar
302 /// (both are scalars), whereas a primitive vs a reference type is not.
303 ///
304 /// Default: common cross-language primitives (string, number, boolean,
305 /// void, null). Languages should override to add their own (e.g.,
306 /// TypeScript adds `undefined`, `never`, `any`, `unknown`; Java adds
307 /// `int`, `long`, `double`, `float`, `char`, `byte`, `short`).
308 fn primitive_type_names(&self) -> &[&str] {
309 &["string", "number", "boolean", "void", "null"]
310 }
311}
312
313// ── Optional capability traits ──────────────────────────────────────────
314//
315// These traits represent optional analysis capabilities that some languages
316// support. They are accessed via optional accessors on `LanguageSemantics`.
317// The orchestrator checks for their presence and conditionally runs the
318// corresponding analysis steps.
319
320/// Deterministic data preparation for component hierarchy inference.
321///
322/// Languages with component composition models (React, Vue, Django, etc.)
323/// implement this to tell the orchestrator what files belong to a component
324/// family and how families relate to each other.
325///
326/// The orchestrator uses `same_family` for symbol grouping, then these
327/// methods for data preparation. The LLM call itself stays in the orchestrator.
328///
329/// TODO: Reconsider — the methods that take repo/git_ref currently require
330/// language impls to know about git. A future refactor should have the
331/// orchestrator own all git plumbing and pass content to pure-logic methods.
332pub trait HierarchySemantics<M: Default + Clone + PartialEq = ()> {
333 /// Get file paths belonging to a component family directory.
334 ///
335 /// Given a family name (e.g., "Dropdown"), returns relative paths to
336 /// all source files in that family. Used to read content for the LLM prompt.
337 fn family_source_paths(&self, repo: &Path, git_ref: &str, family_name: &str) -> Vec<String>;
338
339 /// Get a human-readable family name from a group of symbols.
340 ///
341 /// TypeScript/React: extracts the component directory name
342 /// (e.g., "Dropdown" from "packages/react-core/src/components/Dropdown/...")
343 fn family_name_from_symbols(&self, symbols: &[&Symbol<M>]) -> Option<String>;
344
345 /// Detect cross-family relationships (e.g., React context imports).
346 ///
347 /// Returns pairs of (consumer_family, provider_family, relationship_name).
348 /// Used to include related component signatures in the LLM prompt.
349 fn cross_family_relationships(
350 &self,
351 repo: &Path,
352 git_ref: &str,
353 ) -> Vec<(String, String, String)>;
354
355 /// Read related component signatures for cross-family context.
356 ///
357 /// Given a provider family and the context/relationship names that
358 /// link it to a consumer, returns relevant source content to include
359 /// in the LLM prompt.
360 fn related_family_content(
361 &self,
362 repo: &Path,
363 git_ref: &str,
364 family_name: &str,
365 relationship_names: &[String],
366 ) -> Option<String>;
367
368 /// Whether a symbol is a candidate for hierarchy inference.
369 ///
370 /// The orchestrator calls this to filter symbols when grouping into
371 /// families. Only candidates are counted toward the minimum threshold.
372 ///
373 /// TypeScript/React: PascalCase Variable/Class/Function/Constant
374 /// (React components are PascalCase functions or classes).
375 fn is_hierarchy_candidate(&self, sym: &Symbol<M>) -> bool;
376
377 /// Minimum number of exported types for a family to qualify
378 /// for hierarchy inference. Default: 2.
379 fn min_components_for_hierarchy(&self) -> usize {
380 2
381 }
382
383 /// Compute component hierarchy deterministically.
384 ///
385 /// The default implementation returns an empty map. Language implementations
386 /// that support component hierarchy (e.g., TypeScript/React) override this
387 /// with the full algorithm using language-specific metadata.
388 ///
389 /// The method works on the NEW surface and structural changes. It returns
390 /// the expected hierarchy for the new version.
391 fn compute_deterministic_hierarchy(
392 &self,
393 new_surface: &ApiSurface<M>,
394 structural_changes: &[StructuralChange],
395 ) -> HashMap<String, HashMap<String, Vec<ExpectedChild>>> {
396 let _ = (new_surface, structural_changes);
397 HashMap::new()
398 }
399}
400
401/// Deterministic data preparation for LLM-based rename inference.
402///
403/// Languages that benefit from LLM-detected rename patterns (e.g., CSS
404/// physical→logical property renames, interface rename mappings) implement
405/// this to prepare the data for the LLM call.
406///
407/// The orchestrator calls these methods to build LLM inputs. The LLM call
408/// itself and prompt construction stay in the orchestrator/LLM crate.
409pub trait RenameSemantics {
410 /// Sample removed constants for rename pattern inference.
411 ///
412 /// Default implementation returns the first 30. Language impls can
413 /// prioritize certain suffixes/patterns for better LLM pattern discovery.
414 fn sample_removed_constants<'a>(
415 &self,
416 removed: &[&'a str],
417 _added: &[&'a str],
418 ) -> Vec<&'a str> {
419 removed.iter().take(30).copied().collect()
420 }
421
422 /// Sample added constants for rename pattern inference.
423 ///
424 /// Default implementation returns the first 30.
425 fn sample_added_constants<'a>(&self, _removed: &[&'a str], added: &[&'a str]) -> Vec<&'a str> {
426 added.iter().take(30).copied().collect()
427 }
428
429 /// Minimum count of removed constants to trigger rename inference.
430 /// Default: 50.
431 fn min_removed_for_constant_inference(&self) -> usize {
432 50
433 }
434
435 /// Minimum count of removed interfaces to trigger interface rename
436 /// inference. Default: 2.
437 fn min_removed_for_interface_inference(&self) -> usize {
438 2
439 }
440}
441
442/// Deterministic body-level analysis for behavioral change detection.
443///
444/// Languages with framework-specific body patterns (e.g., JSX diff and CSS
445/// variable scanning for TypeScript/React) implement this to detect
446/// behavioral breaks from function body changes without LLM assistance.
447///
448/// The orchestrator calls `analyze_changed_body` during BU Phase 1 for each
449/// changed function that passes visibility filtering.
450///
451/// The `category_label` field on results uses the serde serialization format
452/// of the language's `Category` type. At the call site, the orchestrator
453/// deserializes this into `L::Category` via serde.
454pub trait BodyAnalysisSemantics {
455 /// Run deterministic analysis on a changed function's body.
456 ///
457 /// Returns a list of (description, category_label) pairs representing
458 /// behavioral breaks detected. The category_label is the string form
459 /// of the language's Category enum (e.g., "dom_structure" for
460 /// `TsCategory::DomStructure`).
461 ///
462 /// TypeScript: runs JSX diff + CSS variable scanning.
463 /// Other languages: may check annotation changes, decorator changes, etc.
464 fn analyze_changed_body(
465 &self,
466 old_body: &str,
467 new_body: &str,
468 func_name: &str,
469 file_path: &str,
470 ) -> Vec<BodyAnalysisResult>;
471}
472
473/// Language-specific human-readable descriptions for changes.
474///
475/// Each language owns its messaging entirely -- there is no generic
476/// template in core. These descriptions are consumed by LLMs downstream,
477/// so language-appropriate terminology matters.
478pub trait MessageFormatter {
479 /// Produce a human-readable description for a structural change.
480 fn describe(&self, change: &StructuralChange) -> String;
481}
482
483// ── Worktree sharing ─────────────────────────────────────────────────────
484
485/// Opaque handle to a checked-out worktree.
486///
487/// Keeps the worktree alive as long as the handle exists. The worktree
488/// is cleaned up when the last `Arc<dyn WorktreeAccess>` drops.
489///
490/// Language crates implement this on their worktree guard type. The
491/// orchestrator holds `Arc<dyn WorktreeAccess>` to share worktrees
492/// between TD and SD pipelines via `std::sync::mpsc::channel`.
493pub trait WorktreeAccess: Send + Sync + 'static {
494 /// Filesystem path to the worktree directory.
495 fn path(&self) -> &Path;
496}
497
498/// Result of `extract_keeping_worktree`: API surface + optional worktree handle.
499pub type ExtractionWithWorktree<M> = (ApiSurface<M>, Option<Arc<dyn WorktreeAccess>>);
500
501// ── Extended analysis parameters ─────────────────────────────────────────
502
503/// Parameters for `Language::run_extended_analysis`.
504///
505/// Bundles the repo/ref context with data computed by the orchestrator
506/// so that language implementations can attach them to their extensions
507/// without the orchestrator needing to know the concrete extension type.
508///
509/// The `dep_dir`, `removed_dep_components`, and `dep_repo_packages` fields
510/// support languages with separate dependency repositories (e.g., a CSS
511/// design system repo, a proto definitions repo). Languages without such
512/// dependencies ignore these fields.
513#[derive(Debug, Clone)]
514pub struct ExtendedAnalysisParams {
515 /// Path to the primary repository being analyzed.
516 pub repo: PathBuf,
517 /// Git ref for the old (from) version.
518 pub from_ref: String,
519 /// Git ref for the new (to) version.
520 pub to_ref: String,
521 /// Optional path to a dependency resource repository (already checked
522 /// out and built). For TypeScript/CSS: the PatternFly CSS repo. For
523 /// other languages: proto definitions, IDL files, etc.
524 pub dep_dir: Option<PathBuf>,
525 /// Component/module directories removed between old and new versions
526 /// of the dependency repository. Computed by the orchestrator from
527 /// directory-level diffing. For TypeScript/CSS: removed CSS component
528 /// blocks (e.g., `["select", "chip"]`).
529 pub removed_dep_components: Vec<String>,
530 /// Dependency repo packages (name → version at new ref).
531 /// Used to generate dep-update rules for packages outside the main
532 /// analyzed monorepo.
533 pub dep_repo_packages: HashMap<String, String>,
534
535 /// Filesystem path to the from-ref worktree (if shared by TD).
536 ///
537 /// When set, the SD pipeline uses filesystem reads instead of
538 /// `read_git_file` for the old version. Also enables `oxc_resolver`
539 /// for robust import resolution (barrel files, package imports,
540 /// tsconfig path aliases).
541 pub from_worktree_path: Option<PathBuf>,
542
543 /// Filesystem path to the to-ref worktree (if shared by TD).
544 ///
545 /// Same as `from_worktree_path` but for the new version.
546 pub to_worktree_path: Option<PathBuf>,
547
548 /// CSS classes where a naive version prefix swap (e.g., `pf-v5-` → `pf-v6-`)
549 /// produces a class name that does not exist in the target CSS distribution.
550 ///
551 /// Each entry is `(old_class, dead_swapped_class)`. Used to generate rules
552 /// that flag these dead classes and suppress the blind prefix swap fix.
553 pub dead_css_classes_after_swap: Vec<(String, String)>,
554}
555
556// ── LLM category definitions ────────────────────────────────────────────
557
558/// A behavioral change category definition for LLM prompts.
559///
560/// Each language provides a list of these to guide the LLM's output.
561/// The `id` must match the serde name of the corresponding `Language::Category`
562/// enum variant (e.g., `"dom_structure"` for `TsCategory::DomStructure`).
563#[derive(Debug, Clone)]
564pub struct LlmCategoryDefinition {
565 /// Machine-readable identifier (e.g., `"dom_structure"`, `"annotation_change"`).
566 /// Must match the serde serialization of `Language::Category` variants.
567 pub id: String,
568 /// Short human label (e.g., `"DOM/render changes"`, `"Annotation changes"`).
569 pub label: String,
570 /// Detailed description for the LLM prompt explaining what this category covers.
571 pub description: String,
572}
573
574/// The core language abstraction.
575///
576/// Composes `LanguageSemantics + MessageFormatter` and adds six associated
577/// types representing language-specific data flowing through the pipeline.
578///
579/// Code that only needs semantic rules can take `&dyn LanguageSemantics`
580/// (no generic parameter). Code that needs the associated types takes
581/// `L: Language`.
582///
583/// ## Konveyor rule generation
584///
585/// Konveyor rule generation is **not** part of this trait. Rule generation
586/// is language-specific and lives in each language crate (e.g.,
587/// `crates/ts/src/konveyor.rs` and `konveyor_v2.rs`). The shared rule
588/// types (`KonveyorRule`, `FixStrategy`) live in `crates/konveyor-core/`.
589/// If a second language needs full rule generation, consider introducing
590/// a `KonveyorGenerator<L>` trait.
591pub trait Language:
592 LanguageSemantics<Self::SymbolData> + MessageFormatter + Send + Sync + 'static
593{
594 /// Per-symbol metadata type carried in `Symbol<M>.language_data`.
595 ///
596 /// TypeScript: `TsSymbolData` (rendered components, CSS tokens).
597 /// Languages without per-symbol metadata: `()`.
598 type SymbolData: Debug
599 + Clone
600 + Default
601 + PartialEq
602 + Eq
603 + Serialize
604 + DeserializeOwned
605 + Send
606 + Sync;
607
608 /// Behavioral change categories for this language.
609 type Category: Debug + Clone + Serialize + DeserializeOwned + Eq + std::hash::Hash + Send + Sync;
610
611 /// Manifest change types for this language's package system.
612 type ManifestChangeType: Debug
613 + Clone
614 + Serialize
615 + DeserializeOwned
616 + Eq
617 + PartialEq
618 + Send
619 + Sync;
620
621 /// Evidence data carried on behavioral changes.
622 type Evidence: Debug + Clone + Serialize + DeserializeOwned + Send + Sync;
623
624 /// Language-specific report data.
625 type ReportData: Debug + Clone + Serialize + DeserializeOwned + Send + Sync;
626
627 /// Language-specific analysis extensions.
628 ///
629 /// Carries pipeline results that are specific to this language
630 /// (e.g., SD pipeline results, hierarchy deltas for TypeScript).
631 /// Replaces the concrete `sd_result` and `hierarchy_deltas` fields
632 /// that were previously on `AnalysisReport`/`AnalysisResult`.
633 ///
634 /// TypeScript: `TsAnalysisExtensions` (SD result, hierarchy deltas).
635 /// Languages without extended analysis: `EmptyExtensions`.
636 type AnalysisExtensions: Debug + Clone + Default + Serialize + DeserializeOwned + Send + Sync;
637
638 // ── Constants ────────────────────────────────────────────────────
639
640 /// Symbol kinds that represent type definitions eligible for rename inference.
641 /// TypeScript: `&[SymbolKind::Interface, SymbolKind::Class]`
642 /// Go: `&[SymbolKind::Struct, SymbolKind::Interface]`
643 const RENAMEABLE_SYMBOL_KINDS: &'static [SymbolKind];
644
645 /// Language identifier for serialization dispatch.
646 const NAME: &'static str;
647
648 /// Manifest file path(s) for this language's package system.
649 ///
650 /// TypeScript: `&["package.json"]`
651 /// Go: `&["go.mod"]`
652 /// Java: `&["pom.xml"]` or `&["build.gradle"]`
653 ///
654 /// TODO: Reconsider — the orchestrator currently reads these files via git
655 /// and passes content to `diff_manifest_content`. A future refactor should
656 /// unify all git plumbing in the orchestrator so language impls are pure
657 /// content processors.
658 const MANIFEST_FILES: &'static [&'static str];
659
660 /// Discover per-package manifest files in monorepos.
661 ///
662 /// Returns `(manifest_path, package_name)` pairs for sub-packages.
663 /// The orchestrator diffs each discovered manifest in addition to
664 /// the static `MANIFEST_FILES` and tags resulting changes with
665 /// `source_package`.
666 ///
667 /// Default: empty (non-monorepo projects or languages without workspaces).
668 fn discover_package_manifests(_repo: &Path, _git_ref: &str) -> Vec<(String, String)> {
669 vec![]
670 }
671
672 /// Source file glob patterns for `git diff --name-only` filtering.
673 ///
674 /// TypeScript: `&["*.ts", "*.tsx"]`
675 /// Go: `&["*.go"]`
676 /// Java: `&["*.java"]`
677 ///
678 /// TODO: Same reconsideration as MANIFEST_FILES.
679 const SOURCE_FILE_PATTERNS: &'static [&'static str];
680
681 // ── Analysis pipeline methods ───────────────────────────────────
682
683 /// Extract the public API surface from source code at a git ref.
684 ///
685 /// The implementation is responsible for checking out the ref,
686 /// running any required build steps, parsing the output, and
687 /// cleaning up temporary files.
688 ///
689 /// An optional `DegradationTracker` can be provided to record non-fatal
690 /// extraction issues (e.g., partial tsc success). These appear in the
691 /// end-of-run summary.
692 fn extract(
693 &self,
694 repo: &Path,
695 git_ref: &str,
696 degradation: Option<&crate::diagnostics::DegradationTracker>,
697 ) -> Result<ApiSurface<Self::SymbolData>>;
698
699 /// Extract the API surface and optionally keep the worktree alive.
700 ///
701 /// Like `extract()`, but returns an `Arc<dyn WorktreeAccess>` that
702 /// keeps the worktree alive after extraction. The orchestrator uses
703 /// this to share worktrees between TD and SD pipelines.
704 ///
705 /// Default implementation calls `extract()` and returns `None` for
706 /// the worktree handle (worktree is created and dropped internally).
707 /// Languages that support worktree sharing override this to wrap
708 /// the guard in `Arc` and return it.
709 fn extract_keeping_worktree(
710 &self,
711 repo: &Path,
712 git_ref: &str,
713 degradation: Option<&crate::diagnostics::DegradationTracker>,
714 ) -> Result<ExtractionWithWorktree<Self::SymbolData>> {
715 let surface = self.extract(repo, git_ref, degradation)?;
716 Ok((surface, None))
717 }
718
719 /// Parse the diff between two git refs and identify all functions
720 /// whose bodies changed (public AND private).
721 fn parse_changed_functions(
722 &self,
723 repo: &Path,
724 from_ref: &str,
725 to_ref: &str,
726 ) -> Result<Vec<ChangedFunction>>;
727
728 /// Given a function, find what calls it (callers, not callees).
729 fn find_callers(&self, file: &Path, symbol_name: &str) -> Result<Vec<Caller>>;
730
731 /// Given a public symbol, find all references to it across the project.
732 fn find_references(&self, file: &Path, symbol_name: &str) -> Result<Vec<Reference>>;
733
734 /// Given a source file, find its associated test file(s) by convention.
735 fn find_tests(&self, repo: &Path, source_file: &Path) -> Result<Vec<TestFile>>;
736
737 /// Diff the test file between two refs. Returns changed assertion lines.
738 fn diff_test_assertions(
739 &self,
740 repo: &Path,
741 test_file: &TestFile,
742 from_ref: &str,
743 to_ref: &str,
744 ) -> Result<TestDiff>;
745
746 // ── Methods ─────────────────────────────────────────────────────
747
748 /// Diff manifest content between two versions.
749 ///
750 /// The orchestrator reads the manifest file(s) at both refs and passes
751 /// the raw content here. The language interprets the format and determines
752 /// what changed and whether it's breaking.
753 ///
754 /// TODO: Reconsider — same as above re: git plumbing ownership.
755 fn diff_manifest_content(old: &str, new: &str) -> Vec<crate::types::ManifestChange<Self>>
756 where
757 Self: Sized;
758
759 /// Whether a file path should be excluded from BU analysis.
760 ///
761 /// Filters out test files, build artifacts, index/barrel files, etc.
762 /// TypeScript: excludes `index.ts`, `.d.ts`, `.test.`, `.spec.`,
763 /// `__tests__/`, `dist/`
764 ///
765 /// TODO: Same reconsideration as above.
766 fn should_exclude_from_analysis(path: &Path) -> bool;
767
768 /// Build the language-specific report from analysis results.
769 ///
770 /// This is the primary report-building entry point. The Language owns
771 /// the entire report construction — language-agnostic structure (grouping
772 /// changes by file, counting breaks) AND language-specific enrichment
773 /// (component detection, hierarchy, child components, etc.).
774 ///
775 /// The result is dropped into a `ReportEnvelope` by the caller.
776 fn build_report(
777 &self,
778 results: &crate::types::AnalysisResult<Self>,
779 repo: &Path,
780 from_ref: &str,
781 to_ref: &str,
782 ) -> crate::types::AnalysisReport<Self>
783 where
784 Self: Sized;
785
786 // ── Behavioral change methods ───────────────────────────────
787
788 /// Determine the behavioral change kind from the evidence type.
789 /// TypeScript: LLM/body analysis → Class (component-level), test delta → Function
790 /// Default: always Function
791 fn behavioral_change_kind(&self, _evidence_type: &EvidenceType) -> BehavioralChangeKind {
792 BehavioralChangeKind::Function
793 }
794
795 /// Extract symbol references from a behavioral change description.
796 /// TypeScript: extracts PascalCase component names (e.g., `<Modal>`, `` `Button` ``)
797 /// Default: empty vec
798 fn extract_referenced_symbols(&self, _description: &str) -> Vec<String> {
799 vec![]
800 }
801
802 /// Format a qualified name for display in reports.
803 /// TypeScript: `src/Modal.tsx::Modal` → `Modal`
804 /// Default: return the qualified name as-is
805 fn display_name(&self, qualified_name: &str) -> String {
806 qualified_name.to_string()
807 }
808
809 /// Return the behavioral change categories for LLM prompts.
810 ///
811 /// Each category has an `id` that must match the serde serialization of
812 /// the corresponding `Language::Category` variant. The LLM prompt is
813 /// built dynamically from these definitions, so adding a new language
814 /// automatically gets language-appropriate behavioral categories.
815 ///
816 /// Default: empty (no behavioral categories — LLM skips category assignment).
817 fn llm_categories(&self) -> Vec<LlmCategoryDefinition> {
818 vec![]
819 }
820
821 // ── v2 Extended Analysis pipeline ───────────────────────────────
822
823 /// Run language-specific extended analysis.
824 ///
825 /// For TypeScript, this runs the SD (Source-Level Diff) pipeline:
826 /// reads component source files at both refs, extracts structured
827 /// profiles, diffs them, and builds composition trees.
828 ///
829 /// The `params` struct carries both common fields (repo, refs, CSS dir)
830 /// and data computed by the orchestrator (removed CSS blocks, dep-repo
831 /// packages) that the language impl can attach to its extensions.
832 ///
833 /// Default implementation returns empty extensions (no extended analysis).
834 fn run_extended_analysis(
835 &self,
836 _params: &ExtendedAnalysisParams,
837 ) -> Result<Self::AnalysisExtensions> {
838 Ok(Self::AnalysisExtensions::default())
839 }
840
841 /// Post-process extensions after both TD and extended analysis complete.
842 ///
843 /// This is where language-specific cross-pipeline processing happens.
844 /// For TypeScript, this runs deprecated replacement detection (requires
845 /// both TD structural changes and SD source-level changes) and transforms
846 /// structural changes accordingly. The `repo`/`from_ref`/`to_ref` params
847 /// enable git-based analysis (e.g., commit co-change analysis for
848 /// deprecated replacement detection).
849 ///
850 /// Returns the (potentially modified) structural changes.
851 /// Default implementation is a no-op.
852 fn finalize_extensions(
853 &self,
854 _extensions: &mut Self::AnalysisExtensions,
855 structural_changes: Arc<Vec<StructuralChange>>,
856 _repo: &Path,
857 _from_ref: &str,
858 _to_ref: &str,
859 ) -> Arc<Vec<StructuralChange>> {
860 structural_changes
861 }
862
863 /// Return log-friendly summary lines for the extended analysis results.
864 ///
865 /// The orchestrator calls this for progress/logging output.
866 /// Default implementation returns empty (no summary).
867 fn extensions_log_summary(&self, _extensions: &Self::AnalysisExtensions) -> Vec<String> {
868 vec![]
869 }
870}
871
872// ── Convenience functions (TD) ──────────────────────────────────────────
873
874/// Compare two API surfaces using language-specific semantic rules.
875///
876/// This is the primary entry point for the TD (Top-Down) pipeline.
877/// The `semantics` parameter provides language-specific rules.
878pub fn diff_surfaces_with_semantics<M, S>(
879 old: &ApiSurface<M>,
880 new: &ApiSurface<M>,
881 semantics: &S,
882) -> Vec<StructuralChange>
883where
884 M: Default + Clone + PartialEq,
885 S: LanguageSemantics<M>,
886{
887 crate::diff::diff_surfaces_with_semantics(old, new, semantics)
888}
889
890/// Compare two API surfaces using minimal semantics (no language-specific rules).
891///
892/// This uses `MinimalSemantics` which is language-agnostic: no member additions
893/// are breaking, no union parsing, no post-processing. For language-aware
894/// diffing, use `diff_surfaces_with_semantics` with a `LanguageSemantics` impl.
895pub fn diff_surfaces<M: Default + Clone + PartialEq>(
896 old: &ApiSurface<M>,
897 new: &ApiSurface<M>,
898) -> Vec<StructuralChange> {
899 crate::diff::diff_surfaces(old, new)
900}
901
902// Hierarchy algorithm tests live in crates/ts/src/language.rs where
903// they can use TsSymbolData.rendered_components. The core default
904// implementation returns an empty map.