Skip to main content

reflectapi_schema/
normalize.rs

1/// Normalization pipeline for transforming raw schemas into semantic IRs
2///
3/// This module provides the core normalization passes that transform
4/// the raw reflectapi_schema types into validated, immutable semantic
5/// representations with deterministic ordering and resolved dependencies.
6use crate::symbol::{STDLIB_TYPES, STDLIB_TYPE_PREFIXES};
7use crate::{
8    Enum, Field, FieldStyle, Fields, Function, Primitive, ResolvedTypeReference, Schema,
9    SemanticEnum, SemanticField, SemanticFunction, SemanticPrimitive, SemanticSchema,
10    SemanticStruct, SemanticType, SemanticTypeParameter, SemanticVariant, Struct, SymbolId,
11    SymbolInfo, SymbolKind, SymbolTable, Type, TypeReference, Variant,
12};
13use std::collections::{BTreeMap, BTreeSet, HashMap};
14
15/// Trait for individual normalization stages in the pipeline
16pub trait NormalizationStage {
17    fn name(&self) -> &'static str;
18    fn transform(&self, schema: &mut Schema) -> Result<(), Vec<NormalizationError>>;
19}
20
21/// Normalization pipeline that applies multiple stages in sequence
22#[derive(Default)]
23pub struct NormalizationPipeline {
24    stages: Vec<Box<dyn NormalizationStage>>,
25}
26
27impl NormalizationPipeline {
28    pub fn new() -> Self {
29        Self { stages: Vec::new() }
30    }
31
32    pub fn add_stage<S: NormalizationStage + 'static>(mut self, stage: S) -> Self {
33        self.stages.push(Box::new(stage));
34        self
35    }
36
37    pub fn run(&self, schema: &mut Schema) -> Result<(), Vec<NormalizationError>> {
38        for stage in &self.stages {
39            stage.transform(schema)?;
40        }
41        Ok(())
42    }
43
44    /// Create the standard normalization pipeline.
45    ///
46    /// Delegates to `PipelineBuilder` with all default settings.
47    pub fn standard() -> Self {
48        PipelineBuilder::new().build()
49    }
50
51    /// Create a codegen-oriented pipeline that only runs CircularDependencyResolution.
52    ///
53    /// This is designed for use when the caller has already run
54    /// `schema.consolidate_types()` and does not want NamingResolution
55    /// (which would rename types and create a name-domain mismatch
56    /// between the SemanticSchema and the raw Schema used for rendering).
57    ///
58    /// Delegates to `PipelineBuilder` with consolidation and naming skipped.
59    pub fn for_codegen() -> Self {
60        PipelineBuilder::new()
61            .consolidation(Consolidation::Skip)
62            .naming(Naming::Skip)
63            .build()
64    }
65}
66
67// ---------------------------------------------------------------------------
68// PipelineBuilder: configurable pipeline construction
69// ---------------------------------------------------------------------------
70
71/// Controls whether and how input/output types are merged.
72#[derive(Debug, Clone, Default)]
73pub enum Consolidation {
74    /// Run the standard `TypeConsolidationStage`.
75    #[default]
76    Standard,
77    /// Skip type consolidation entirely.
78    Skip,
79}
80
81/// Controls how type names are resolved.
82#[derive(Default)]
83pub enum Naming {
84    /// Run the standard `NamingResolutionStage`.
85    #[default]
86    Standard,
87    /// Skip naming resolution entirely.
88    Skip,
89    /// Use a custom naming stage.
90    Custom(Box<dyn NormalizationStage>),
91}
92
93impl std::fmt::Debug for Naming {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        match self {
96            Naming::Standard => write!(f, "Naming::Standard"),
97            Naming::Skip => write!(f, "Naming::Skip"),
98            Naming::Custom(_) => write!(f, "Naming::Custom(...)"),
99        }
100    }
101}
102
103/// Builder for configuring a normalization pipeline.
104///
105/// Provides fine-grained control over which normalization stages are included
106/// and in what order. The default configuration matches `NormalizationPipeline::standard()`.
107///
108/// # Examples
109///
110/// ```rust,ignore
111/// // Standard pipeline (equivalent to NormalizationPipeline::standard())
112/// let pipeline = PipelineBuilder::new().build();
113///
114/// // Codegen pipeline (equivalent to NormalizationPipeline::for_codegen())
115/// let pipeline = PipelineBuilder::new()
116///     .consolidation(Consolidation::Skip)
117///     .naming(Naming::Skip)
118///     .build();
119///
120/// // Custom pipeline with extra stages
121/// let pipeline = PipelineBuilder::new()
122///     .circular_dependency_strategy(ResolutionStrategy::Boxing)
123///     .add_stage(MyCustomStage)
124///     .build();
125/// ```
126pub struct PipelineBuilder {
127    consolidation: Consolidation,
128    naming: Naming,
129    circular_dependency_strategy: ResolutionStrategy,
130    extra_stages: Vec<Box<dyn NormalizationStage>>,
131}
132
133impl Default for PipelineBuilder {
134    fn default() -> Self {
135        Self::new()
136    }
137}
138
139impl PipelineBuilder {
140    /// Create a new builder with default settings (all stages enabled).
141    pub fn new() -> Self {
142        Self {
143            consolidation: Consolidation::default(),
144            naming: Naming::default(),
145            circular_dependency_strategy: ResolutionStrategy::default(),
146            extra_stages: Vec::new(),
147        }
148    }
149
150    /// Set the consolidation strategy.
151    pub fn consolidation(mut self, consolidation: Consolidation) -> Self {
152        self.consolidation = consolidation;
153        self
154    }
155
156    /// Set the naming resolution strategy.
157    pub fn naming(mut self, naming: Naming) -> Self {
158        self.naming = naming;
159        self
160    }
161
162    /// Set the circular dependency resolution strategy.
163    pub fn circular_dependency_strategy(mut self, strategy: ResolutionStrategy) -> Self {
164        self.circular_dependency_strategy = strategy;
165        self
166    }
167
168    /// Append a custom stage that will run after the built-in stages.
169    pub fn add_stage<S: NormalizationStage + 'static>(mut self, stage: S) -> Self {
170        self.extra_stages.push(Box::new(stage));
171        self
172    }
173
174    /// Build the configured `NormalizationPipeline`.
175    ///
176    /// Stages are added in order:
177    /// 1. Type consolidation (if not skipped)
178    /// 2. Naming resolution (if not skipped, or custom stage)
179    /// 3. Circular dependency resolution (always included)
180    /// 4. Any extra stages added via `add_stage()`
181    pub fn build(self) -> NormalizationPipeline {
182        let mut pipeline = NormalizationPipeline::new();
183
184        match self.consolidation {
185            Consolidation::Standard => {
186                pipeline = pipeline.add_stage(TypeConsolidationStage);
187            }
188            Consolidation::Skip => {}
189        }
190
191        match self.naming {
192            Naming::Standard => {
193                pipeline = pipeline.add_stage(NamingResolutionStage);
194            }
195            Naming::Skip => {}
196            Naming::Custom(stage) => {
197                pipeline.stages.push(stage);
198            }
199        }
200
201        pipeline = pipeline.add_stage(CircularDependencyResolutionStage::with_strategy(
202            self.circular_dependency_strategy,
203        ));
204
205        for stage in self.extra_stages {
206            pipeline.stages.push(stage);
207        }
208
209        pipeline
210    }
211}
212
213// ---------------------------------------------------------------------------
214// Stage 1: Type Consolidation
215// ---------------------------------------------------------------------------
216
217/// Merges input_types and output_types into a single unified types collection.
218/// Handles naming conflicts by renaming types with prefixes.
219pub struct TypeConsolidationStage;
220
221impl NormalizationStage for TypeConsolidationStage {
222    fn name(&self) -> &'static str {
223        "TypeConsolidation"
224    }
225
226    fn transform(&self, schema: &mut Schema) -> Result<(), Vec<NormalizationError>> {
227        use crate::Typespace;
228
229        let mut consolidated = Typespace::new();
230        let mut name_conflicts = HashMap::new();
231        // Tracks old_name -> new_name for type reference rewriting
232        let mut rename_map: HashMap<String, String> = HashMap::new();
233
234        let mut input_type_names = HashMap::new();
235        let mut output_type_names = HashMap::new();
236
237        for ty in schema.input_types.types() {
238            let simple_name = extract_simple_name(ty.name());
239            input_type_names.insert(simple_name.clone(), ty.clone());
240
241            if output_type_names.contains_key(&simple_name) {
242                name_conflicts.insert(simple_name, true);
243            }
244        }
245
246        for ty in schema.output_types.types() {
247            let simple_name = extract_simple_name(ty.name());
248            output_type_names.insert(simple_name.clone(), ty.clone());
249
250            if input_type_names.contains_key(&simple_name) {
251                name_conflicts.insert(simple_name, true);
252            }
253        }
254
255        for ty in schema.input_types.types() {
256            let simple_name = extract_simple_name(ty.name());
257            let mut new_type = ty.clone();
258
259            if name_conflicts.contains_key(&simple_name) {
260                let old_name = ty.name().to_string();
261                let new_name = format!("input.{}", ty.name().replace("::", "."));
262                rename_type(&mut new_type, &new_name);
263                rename_map.insert(old_name, new_name);
264            }
265
266            consolidated.insert_type(new_type);
267        }
268
269        for ty in schema.output_types.types() {
270            let simple_name = extract_simple_name(ty.name());
271            let mut new_type = ty.clone();
272
273            if name_conflicts.contains_key(&simple_name) {
274                let old_name = ty.name().to_string();
275                let new_name = format!("output.{}", ty.name().replace("::", "."));
276                rename_type(&mut new_type, &new_name);
277                rename_map.insert(old_name, new_name);
278                consolidated.insert_type(new_type);
279            } else if !input_type_names.contains_key(&simple_name) {
280                consolidated.insert_type(new_type);
281            }
282        }
283
284        schema.input_types = consolidated;
285        schema.output_types = Typespace::new();
286
287        // Rewrite type references that still point to old names
288        if !rename_map.is_empty() {
289            for function in &mut schema.functions {
290                update_type_reference_in_option(&mut function.input_type, &rename_map);
291                update_type_reference_in_option(&mut function.input_headers, &rename_map);
292                update_type_reference_in_option(&mut function.output_type, &rename_map);
293                update_type_reference_in_option(&mut function.error_type, &rename_map);
294            }
295
296            let types_to_update: Vec<_> = schema.input_types.types().cloned().collect();
297            schema.input_types = Typespace::new();
298            for mut ty in types_to_update {
299                update_type_references_in_type(&mut ty, &rename_map);
300                schema.input_types.insert_type(ty);
301            }
302        }
303
304        Ok(())
305    }
306}
307
308fn extract_simple_name(qualified_name: &str) -> String {
309    qualified_name
310        .split("::")
311        .last()
312        .unwrap_or(qualified_name)
313        .to_string()
314}
315
316fn rename_type(ty: &mut Type, new_name: &str) {
317    let new_path: Vec<String> = new_name.split("::").map(|s| s.to_string()).collect();
318    match ty {
319        Type::Struct(s) => {
320            s.name = new_name.to_string();
321            s.id.path = new_path;
322        }
323        Type::Enum(e) => {
324            e.name = new_name.to_string();
325            e.id.path = new_path;
326        }
327        Type::Primitive(p) => {
328            p.name = new_name.to_string();
329            p.id.path = new_path;
330        }
331    }
332}
333
334// ---------------------------------------------------------------------------
335// Stage 2: Naming Resolution
336// ---------------------------------------------------------------------------
337
338/// Sanitizes type names by stripping module paths and handling naming conflicts.
339pub struct NamingResolutionStage;
340
341impl NormalizationStage for NamingResolutionStage {
342    fn name(&self) -> &'static str {
343        "NamingResolution"
344    }
345
346    fn transform(&self, schema: &mut Schema) -> Result<(), Vec<NormalizationError>> {
347        let mut name_usage: HashMap<String, Vec<String>> = HashMap::new();
348        let mut name_conflicts = HashMap::new();
349
350        for ty in schema.input_types.types() {
351            let qualified_name = ty.name().to_string();
352            let simple_name = extract_simple_name(&qualified_name);
353
354            let entries = name_usage.entry(simple_name.clone()).or_default();
355            if !entries.contains(&qualified_name) {
356                if !entries.is_empty() {
357                    name_conflicts.insert(simple_name.clone(), true);
358                }
359                entries.push(qualified_name);
360            }
361        }
362
363        let types_to_update: Vec<_> = schema.input_types.types().cloned().collect();
364        schema.input_types = crate::Typespace::new();
365
366        for mut ty in types_to_update {
367            let qualified_name = ty.name().to_string();
368            let simple_name = extract_simple_name(&qualified_name);
369
370            let resolved_name = if name_conflicts.contains_key(&simple_name) {
371                generate_unique_name(&qualified_name)
372            } else {
373                simple_name
374            };
375
376            rename_type(&mut ty, &resolved_name);
377            schema.input_types.insert_type(ty);
378        }
379
380        update_type_references_in_schema(schema, &name_usage, &name_conflicts);
381
382        Ok(())
383    }
384}
385
386fn generate_unique_name(qualified_name: &str) -> String {
387    let parts: Vec<&str> = qualified_name.split("::").collect();
388    if parts.len() < 2 {
389        return qualified_name.to_string();
390    }
391
392    let type_name = parts.last().unwrap();
393    let module_parts: Vec<&str> = parts[..parts.len() - 1].to_vec();
394
395    let non_excluded: Vec<&str> = module_parts
396        .iter()
397        .filter(|&&part| part != "model" && part != "proto" && !part.is_empty())
398        .copied()
399        .collect();
400
401    let prefix = if non_excluded.is_empty() {
402        module_parts.join("_")
403    } else {
404        non_excluded
405            .iter()
406            .map(|s| capitalize_first_letter(s))
407            .collect::<Vec<_>>()
408            .join("")
409    };
410    format!("{prefix}{type_name}")
411}
412
413fn capitalize_first_letter(s: &str) -> String {
414    let mut chars = s.chars();
415    match chars.next() {
416        None => String::new(),
417        Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
418    }
419}
420
421fn update_type_references_in_schema(
422    schema: &mut Schema,
423    name_usage: &HashMap<String, Vec<String>>,
424    name_conflicts: &HashMap<String, bool>,
425) {
426    let mut name_mapping = HashMap::new();
427
428    for (simple_name, qualified_names) in name_usage {
429        if name_conflicts.contains_key(simple_name) {
430            for qualified_name in qualified_names {
431                let resolved_name = generate_unique_name(qualified_name);
432                name_mapping.insert(qualified_name.clone(), resolved_name);
433            }
434        } else {
435            for qualified_name in qualified_names {
436                name_mapping.insert(qualified_name.clone(), simple_name.clone());
437            }
438        }
439    }
440
441    for function in &mut schema.functions {
442        update_type_reference_in_option(&mut function.input_type, &name_mapping);
443        update_type_reference_in_option(&mut function.input_headers, &name_mapping);
444        update_type_reference_in_option(&mut function.output_type, &name_mapping);
445        update_type_reference_in_option(&mut function.error_type, &name_mapping);
446    }
447
448    let types_to_update: Vec<_> = schema.input_types.types().cloned().collect();
449    schema.input_types = crate::Typespace::new();
450
451    for mut ty in types_to_update {
452        update_type_references_in_type(&mut ty, &name_mapping);
453        schema.input_types.insert_type(ty);
454    }
455}
456
457fn update_type_reference(
458    type_ref: &mut crate::TypeReference,
459    name_mapping: &HashMap<String, String>,
460) {
461    if let Some(new_name) = name_mapping.get(&type_ref.name) {
462        type_ref.name.clone_from(new_name);
463    }
464
465    for arg in &mut type_ref.arguments {
466        update_type_reference(arg, name_mapping);
467    }
468}
469
470fn update_type_reference_in_option(
471    type_ref_opt: &mut Option<crate::TypeReference>,
472    name_mapping: &HashMap<String, String>,
473) {
474    if let Some(type_ref) = type_ref_opt {
475        update_type_reference(type_ref, name_mapping);
476    }
477}
478
479fn update_type_references_in_type(ty: &mut crate::Type, name_mapping: &HashMap<String, String>) {
480    match ty {
481        crate::Type::Struct(s) => match &mut s.fields {
482            crate::Fields::Named(fields) | crate::Fields::Unnamed(fields) => {
483                for field in fields {
484                    update_type_reference(&mut field.type_ref, name_mapping);
485                }
486            }
487            crate::Fields::None => {}
488        },
489        crate::Type::Enum(e) => {
490            for variant in &mut e.variants {
491                match &mut variant.fields {
492                    crate::Fields::Named(fields) | crate::Fields::Unnamed(fields) => {
493                        for field in fields {
494                            update_type_reference(&mut field.type_ref, name_mapping);
495                        }
496                    }
497                    crate::Fields::None => {}
498                }
499            }
500        }
501        crate::Type::Primitive(p) => {
502            if let Some(fallback) = &mut p.fallback {
503                update_type_reference(fallback, name_mapping);
504            }
505        }
506    }
507}
508
509// ---------------------------------------------------------------------------
510// Stage 3: Circular Dependency Resolution
511// ---------------------------------------------------------------------------
512
513/// Detects and resolves circular dependencies using Tarjan's SCC algorithm
514/// and configurable resolution strategies.
515pub struct CircularDependencyResolutionStage {
516    strategy: ResolutionStrategy,
517}
518
519#[derive(Debug, Clone, Default)]
520pub enum ResolutionStrategy {
521    /// Try boxing first, then forward declarations
522    #[default]
523    Intelligent,
524    /// Always use Box<T> for self-references
525    Boxing,
526    /// Always use forward declarations
527    ForwardDeclarations,
528    /// Make circular references optional
529    OptionalBreaking,
530    /// Use reference counting for complex cycles
531    ReferenceCounted,
532}
533
534impl CircularDependencyResolutionStage {
535    pub fn new() -> Self {
536        Self {
537            strategy: ResolutionStrategy::default(),
538        }
539    }
540
541    pub fn with_strategy(strategy: ResolutionStrategy) -> Self {
542        Self { strategy }
543    }
544}
545
546impl Default for CircularDependencyResolutionStage {
547    fn default() -> Self {
548        Self::new()
549    }
550}
551
552impl NormalizationStage for CircularDependencyResolutionStage {
553    fn name(&self) -> &'static str {
554        "CircularDependencyResolution"
555    }
556
557    fn transform(&self, schema: &mut Schema) -> Result<(), Vec<NormalizationError>> {
558        let cycles = self.detect_circular_dependencies(schema)?;
559
560        if cycles.is_empty() {
561            return Ok(());
562        }
563
564        for cycle in cycles {
565            self.resolve_cycle(schema, &cycle)?;
566        }
567
568        Ok(())
569    }
570}
571
572impl CircularDependencyResolutionStage {
573    fn detect_circular_dependencies(
574        &self,
575        schema: &Schema,
576    ) -> Result<Vec<Vec<String>>, Vec<NormalizationError>> {
577        let mut dependencies: HashMap<String, BTreeSet<String>> = HashMap::new();
578
579        for ty in schema
580            .input_types
581            .types()
582            .chain(schema.output_types.types())
583        {
584            let type_name = ty.name().to_string();
585            let mut deps = BTreeSet::new();
586            self.collect_type_dependencies(ty, &mut deps);
587            dependencies.insert(type_name, deps);
588        }
589
590        let scc_cycles = self.find_strongly_connected_components(&dependencies);
591
592        let mut cycles = Vec::new();
593        for component in scc_cycles {
594            if component.len() > 1
595                || (component.len() == 1
596                    && dependencies
597                        .get(&component[0])
598                        .is_some_and(|deps| deps.contains(&component[0])))
599            {
600                cycles.push(component);
601            }
602        }
603
604        Ok(cycles)
605    }
606
607    fn collect_type_dependencies(&self, ty: &Type, deps: &mut BTreeSet<String>) {
608        match ty {
609            Type::Struct(s) => {
610                for field in s.fields() {
611                    self.collect_type_ref_dependencies(&field.type_ref, deps);
612                }
613            }
614            Type::Enum(e) => {
615                for variant in e.variants() {
616                    for field in variant.fields() {
617                        self.collect_type_ref_dependencies(&field.type_ref, deps);
618                    }
619                }
620            }
621            Type::Primitive(p) => {
622                if let Some(fallback) = &p.fallback {
623                    self.collect_type_ref_dependencies(fallback, deps);
624                }
625            }
626        }
627    }
628
629    fn collect_type_ref_dependencies(&self, type_ref: &TypeReference, deps: &mut BTreeSet<String>) {
630        if !self.is_stdlib_type(&type_ref.name) && !self.is_generic_parameter(&type_ref.name) {
631            deps.insert(type_ref.name.clone());
632        }
633
634        for arg in &type_ref.arguments {
635            self.collect_type_ref_dependencies(arg, deps);
636        }
637    }
638
639    fn is_stdlib_type(&self, name: &str) -> bool {
640        // Check exact matches from the canonical list
641        if STDLIB_TYPES.iter().any(|&(n, _)| n == name) {
642            return true;
643        }
644        // Fall back to prefix matching for types not explicitly listed
645        STDLIB_TYPE_PREFIXES
646            .iter()
647            .any(|prefix| name.starts_with(prefix))
648    }
649
650    fn is_generic_parameter(&self, name: &str) -> bool {
651        name.len() <= 2 && name.chars().all(|c| c.is_ascii_uppercase())
652    }
653
654    fn find_strongly_connected_components(
655        &self,
656        dependencies: &HashMap<String, BTreeSet<String>>,
657    ) -> Vec<Vec<String>> {
658        let mut index = 0;
659        let mut stack = Vec::new();
660        let mut indices: HashMap<String, usize> = HashMap::new();
661        let mut lowlinks: HashMap<String, usize> = HashMap::new();
662        let mut on_stack: HashMap<String, bool> = HashMap::new();
663        let mut components = Vec::new();
664
665        for node in dependencies.keys() {
666            if !indices.contains_key(node) {
667                self.strongconnect(
668                    node,
669                    dependencies,
670                    &mut index,
671                    &mut stack,
672                    &mut indices,
673                    &mut lowlinks,
674                    &mut on_stack,
675                    &mut components,
676                );
677            }
678        }
679
680        components
681    }
682
683    #[allow(clippy::too_many_arguments, clippy::only_used_in_recursion)]
684    fn strongconnect(
685        &self,
686        node: &str,
687        dependencies: &HashMap<String, BTreeSet<String>>,
688        index: &mut usize,
689        stack: &mut Vec<String>,
690        indices: &mut HashMap<String, usize>,
691        lowlinks: &mut HashMap<String, usize>,
692        on_stack: &mut HashMap<String, bool>,
693        components: &mut Vec<Vec<String>>,
694    ) {
695        indices.insert(node.to_string(), *index);
696        lowlinks.insert(node.to_string(), *index);
697        *index += 1;
698        stack.push(node.to_string());
699        on_stack.insert(node.to_string(), true);
700
701        if let Some(deps) = dependencies.get(node) {
702            for neighbor in deps {
703                if !indices.contains_key(neighbor) {
704                    self.strongconnect(
705                        neighbor,
706                        dependencies,
707                        index,
708                        stack,
709                        indices,
710                        lowlinks,
711                        on_stack,
712                        components,
713                    );
714                    lowlinks.insert(node.to_string(), lowlinks[node].min(lowlinks[neighbor]));
715                } else if *on_stack.get(neighbor).unwrap_or(&false) {
716                    lowlinks.insert(node.to_string(), lowlinks[node].min(indices[neighbor]));
717                }
718            }
719        }
720
721        if lowlinks[node] == indices[node] {
722            let mut component = Vec::new();
723            loop {
724                let w = stack.pop().unwrap();
725                on_stack.insert(w.clone(), false);
726                component.push(w.clone());
727                if w == node {
728                    break;
729                }
730            }
731            if !component.is_empty() {
732                components.push(component);
733            }
734        }
735    }
736
737    fn resolve_cycle(
738        &self,
739        schema: &mut Schema,
740        cycle: &[String],
741    ) -> Result<(), Vec<NormalizationError>> {
742        match self.strategy {
743            ResolutionStrategy::Intelligent => {
744                if cycle.len() == 1 {
745                    self.apply_boxing_strategy(schema, cycle)
746                } else {
747                    self.apply_forward_declaration_strategy(schema, cycle)
748                }
749            }
750            ResolutionStrategy::Boxing => self.apply_boxing_strategy(schema, cycle),
751            ResolutionStrategy::ForwardDeclarations => {
752                self.apply_forward_declaration_strategy(schema, cycle)
753            }
754            ResolutionStrategy::OptionalBreaking => {
755                self.apply_optional_breaking_strategy(schema, cycle)
756            }
757            ResolutionStrategy::ReferenceCounted => {
758                self.apply_reference_counting_strategy(schema, cycle)
759            }
760        }
761    }
762
763    /// No-op: Rust schemas already encode `Box<T>` in the type references, so
764    /// self-referential types (cycle length 1) and multi-type cycles (A → B → A)
765    /// are already representable.  The cycle detection performed by the
766    /// `CircularDependencyResolutionStage` is still valuable — downstream codegen
767    /// backends (e.g. Python, TypeScript) can query the detected cycles to emit
768    /// forward-reference annotations or similar language-specific constructs.
769    fn apply_boxing_strategy(
770        &self,
771        _schema: &mut Schema,
772        _cycle: &[String],
773    ) -> Result<(), Vec<NormalizationError>> {
774        Ok(())
775    }
776
777    fn apply_forward_declaration_strategy(
778        &self,
779        _schema: &mut Schema,
780        _cycle: &[String],
781    ) -> Result<(), Vec<NormalizationError>> {
782        // TODO: Implement forward declarations by creating type aliases
783        Ok(())
784    }
785
786    fn apply_optional_breaking_strategy(
787        &self,
788        _schema: &mut Schema,
789        _cycle: &[String],
790    ) -> Result<(), Vec<NormalizationError>> {
791        // TODO: Make certain fields optional to break cycles
792        Ok(())
793    }
794
795    fn apply_reference_counting_strategy(
796        &self,
797        _schema: &mut Schema,
798        _cycle: &[String],
799    ) -> Result<(), Vec<NormalizationError>> {
800        // TODO: Wrap cycle references in Rc<RefCell<T>>
801        Ok(())
802    }
803}
804
805// ---------------------------------------------------------------------------
806// Error types
807// ---------------------------------------------------------------------------
808
809#[derive(Debug, Clone, PartialEq, Eq)]
810pub enum NormalizationError {
811    UnresolvedReference {
812        name: String,
813        referrer: SymbolId,
814    },
815    CircularDependency {
816        cycle: Vec<SymbolId>,
817    },
818    ConflictingDefinition {
819        symbol: SymbolId,
820        existing: String,
821        new: String,
822    },
823    InvalidGenericParameter {
824        type_name: String,
825        parameter: String,
826        reason: String,
827    },
828    ValidationError {
829        symbol: SymbolId,
830        message: String,
831    },
832}
833
834impl std::fmt::Display for NormalizationError {
835    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
836        match self {
837            NormalizationError::UnresolvedReference { name, referrer } => {
838                write!(
839                    f,
840                    "Unresolved type reference '{name}' in symbol {referrer:?}"
841                )
842            }
843            NormalizationError::CircularDependency { cycle } => {
844                write!(f, "Circular dependency detected: {cycle:?}")
845            }
846            NormalizationError::ConflictingDefinition {
847                symbol,
848                existing,
849                new,
850            } => {
851                write!(
852                    f,
853                    "Conflicting definition for symbol {symbol:?}: existing '{existing}', new '{new}'"
854                )
855            }
856            NormalizationError::InvalidGenericParameter {
857                type_name,
858                parameter,
859                reason,
860            } => {
861                write!(
862                    f,
863                    "Invalid generic parameter '{parameter}' in type '{type_name}': {reason}"
864                )
865            }
866            NormalizationError::ValidationError { symbol, message } => {
867                write!(f, "Validation error for symbol {symbol:?}: {message}")
868            }
869        }
870    }
871}
872
873impl std::error::Error for NormalizationError {}
874
875// ---------------------------------------------------------------------------
876// Normalizer: main pipeline converting Schema -> SemanticSchema
877// ---------------------------------------------------------------------------
878
879#[derive(Debug)]
880struct NormalizationContext {
881    symbol_table: SymbolTable,
882    raw_types: HashMap<SymbolId, Type>,
883    raw_functions: HashMap<SymbolId, Function>,
884    resolution_cache: HashMap<String, SymbolId>,
885    generic_scope: BTreeSet<String>,
886    errors: Vec<NormalizationError>,
887}
888
889impl Default for NormalizationContext {
890    fn default() -> Self {
891        Self::new()
892    }
893}
894
895impl NormalizationContext {
896    fn new() -> Self {
897        Self {
898            symbol_table: SymbolTable::new(),
899            raw_types: HashMap::new(),
900            raw_functions: HashMap::new(),
901            resolution_cache: HashMap::new(),
902            generic_scope: BTreeSet::new(),
903            errors: Vec::new(),
904        }
905    }
906
907    fn has_errors(&self) -> bool {
908        !self.errors.is_empty()
909    }
910
911    fn take_errors(&mut self) -> Vec<NormalizationError> {
912        std::mem::take(&mut self.errors)
913    }
914}
915
916/// Main normalizer that converts a raw Schema into a SemanticSchema
917pub struct Normalizer {
918    context: NormalizationContext,
919}
920
921impl Normalizer {
922    pub fn new() -> Self {
923        Self {
924            context: NormalizationContext::new(),
925        }
926    }
927
928    /// Normalize a raw schema into a semantic schema using the standard pipeline.
929    pub fn normalize(self, schema: &Schema) -> Result<SemanticSchema, Vec<NormalizationError>> {
930        self.normalize_with_pipeline(schema, NormalizationPipeline::standard())
931    }
932
933    /// Normalize a raw schema into a semantic schema using a custom pipeline.
934    ///
935    /// Use `PipelineBuilder` to configure which stages run, or the convenience
936    /// methods `NormalizationPipeline::standard()` / `NormalizationPipeline::for_codegen()`.
937    pub fn normalize_with_pipeline(
938        mut self,
939        schema: &Schema,
940        pipeline: NormalizationPipeline,
941    ) -> Result<SemanticSchema, Vec<NormalizationError>> {
942        // Clone so that pipeline stages can mutate without affecting the caller
943        let mut schema = schema.clone();
944
945        // Phase 0: Ensure all symbols have unique, stable IDs
946        crate::ids::ensure_symbol_ids(&mut schema);
947
948        // Capture original type names BEFORE the pipeline transforms them.
949        // NamingResolution (if present in the pipeline) will strip module
950        // paths, so we need to map short names back to qualified names.
951        let pre_norm_names: Vec<String> = schema
952            .input_types
953            .types()
954            .chain(schema.output_types.types())
955            .map(|t| t.name().to_string())
956            .collect();
957
958        // Run the caller-provided pipeline
959        pipeline.run(&mut schema)?;
960
961        // Build the original_names reverse mapping.
962        // When NamingResolution runs, it strips module paths (e.g.
963        // "my_module::MyType" -> "MyType"). We map the short name back
964        // to the pre-pipeline qualified name.
965        // When NamingResolution is NOT in the pipeline, names are unchanged
966        // and the mapping is identity — the unwrap_or fallback handles this.
967        let mut original_names: HashMap<String, String> = HashMap::new();
968        for pre_name in &pre_norm_names {
969            let short = pre_name.split("::").last().unwrap_or(pre_name);
970            original_names
971                .entry(short.to_string())
972                .or_insert_with(|| pre_name.clone());
973        }
974
975        // Phase 1: Symbol Discovery
976        self.discover_symbols(&schema)?;
977
978        // Phase 2: Type Resolution
979        self.resolve_types()?;
980
981        // Phase 3: Dependency Analysis
982        self.analyze_dependencies()?;
983
984        // Phase 4: Semantic Validation
985        self.validate_semantics()?;
986
987        // Phase 5: IR Construction
988        self.build_semantic_ir(&schema, &original_names)
989    }
990
991    fn discover_symbols(&mut self, schema: &Schema) -> Result<(), Vec<NormalizationError>> {
992        let schema_info = SymbolInfo {
993            id: schema.id.clone(),
994            name: schema.name.clone(),
995            path: schema.id.path.clone(),
996            kind: SymbolKind::Struct,
997            resolved: false,
998            dependencies: BTreeSet::new(),
999        };
1000        self.context.symbol_table.register(schema_info);
1001
1002        for function in &schema.functions {
1003            let function_info = SymbolInfo {
1004                id: function.id.clone(),
1005                name: function.name.clone(),
1006                path: function.id.path.clone(),
1007                kind: SymbolKind::Endpoint,
1008                resolved: false,
1009                dependencies: BTreeSet::new(),
1010            };
1011            self.context.symbol_table.register(function_info);
1012            self.context
1013                .raw_functions
1014                .insert(function.id.clone(), function.clone());
1015        }
1016
1017        self.discover_types_from_typespace(&schema.input_types);
1018        self.discover_types_from_typespace(&schema.output_types);
1019
1020        if self.context.has_errors() {
1021            return Err(self.context.take_errors());
1022        }
1023
1024        Ok(())
1025    }
1026
1027    fn discover_types_from_typespace(&mut self, typespace: &crate::Typespace) {
1028        for ty in typespace.types() {
1029            self.discover_type_symbols(ty);
1030        }
1031    }
1032
1033    fn discover_type_symbols(&mut self, ty: &Type) {
1034        let (id, name, kind) = match ty {
1035            Type::Primitive(p) => (p.id.clone(), p.name.clone(), SymbolKind::Primitive),
1036            Type::Struct(s) => (s.id.clone(), s.name.clone(), SymbolKind::Struct),
1037            Type::Enum(e) => (e.id.clone(), e.name.clone(), SymbolKind::Enum),
1038        };
1039
1040        let path = id.path.clone();
1041
1042        let symbol_info = SymbolInfo {
1043            id: id.clone(),
1044            name,
1045            path,
1046            kind,
1047            resolved: false,
1048            dependencies: BTreeSet::new(),
1049        };
1050
1051        self.context.symbol_table.register(symbol_info);
1052        self.context.raw_types.insert(id, ty.clone());
1053
1054        match ty {
1055            Type::Struct(s) => self.discover_struct_symbols(s),
1056            Type::Enum(e) => self.discover_enum_symbols(e),
1057            Type::Primitive(_) => {}
1058        }
1059    }
1060
1061    fn discover_struct_symbols(&mut self, strukt: &Struct) {
1062        for field in strukt.fields() {
1063            let field_info = SymbolInfo {
1064                id: field.id.clone(),
1065                name: field.name.clone(),
1066                path: field.id.path.clone(),
1067                kind: SymbolKind::Field,
1068                resolved: false,
1069                dependencies: BTreeSet::new(),
1070            };
1071            self.context.symbol_table.register(field_info);
1072        }
1073    }
1074
1075    fn discover_enum_symbols(&mut self, enm: &Enum) {
1076        for variant in enm.variants() {
1077            let variant_info = SymbolInfo {
1078                id: variant.id.clone(),
1079                name: variant.name.clone(),
1080                path: variant.id.path.clone(),
1081                kind: SymbolKind::Variant,
1082                resolved: false,
1083                dependencies: BTreeSet::new(),
1084            };
1085            self.context.symbol_table.register(variant_info);
1086
1087            for field in variant.fields() {
1088                let field_info = SymbolInfo {
1089                    id: field.id.clone(),
1090                    name: field.name.clone(),
1091                    path: field.id.path.clone(),
1092                    kind: SymbolKind::Field,
1093                    resolved: false,
1094                    dependencies: BTreeSet::new(),
1095                };
1096                self.context.symbol_table.register(field_info);
1097            }
1098        }
1099    }
1100
1101    fn resolve_types(&mut self) -> Result<(), Vec<NormalizationError>> {
1102        for symbol_info in self.context.symbol_table.symbols.values() {
1103            if !matches!(
1104                symbol_info.kind,
1105                SymbolKind::Struct
1106                    | SymbolKind::Enum
1107                    | SymbolKind::Primitive
1108                    | SymbolKind::TypeAlias
1109            ) {
1110                continue;
1111            }
1112            self.context
1113                .resolution_cache
1114                .insert(symbol_info.name.clone(), symbol_info.id.clone());
1115
1116            let qualified_name = symbol_info.id.qualified_name();
1117            if qualified_name != symbol_info.name {
1118                self.context
1119                    .resolution_cache
1120                    .insert(qualified_name, symbol_info.id.clone());
1121            }
1122        }
1123
1124        self.add_stdlib_types_to_cache();
1125
1126        for (function_id, function) in &self.context.raw_functions.clone() {
1127            self.resolve_function_references(function_id, function);
1128        }
1129
1130        for (type_id, ty) in &self.context.raw_types.clone() {
1131            self.resolve_type_references(type_id, ty);
1132        }
1133
1134        if self.context.has_errors() {
1135            return Err(self.context.take_errors());
1136        }
1137
1138        Ok(())
1139    }
1140
1141    fn resolve_function_references(&mut self, function_id: &SymbolId, function: &Function) {
1142        if let Some(input_type) = &function.input_type {
1143            self.resolve_single_reference(function_id, input_type);
1144        }
1145        if let Some(input_headers) = &function.input_headers {
1146            self.resolve_single_reference(function_id, input_headers);
1147        }
1148        if let Some(output_type) = &function.output_type {
1149            self.resolve_single_reference(function_id, output_type);
1150        }
1151        if let Some(error_type) = &function.error_type {
1152            self.resolve_single_reference(function_id, error_type);
1153        }
1154    }
1155
1156    fn resolve_type_references(&mut self, type_id: &SymbolId, ty: &Type) {
1157        let generic_params: BTreeSet<String> = ty.parameters().map(|p| p.name.clone()).collect();
1158        self.context.generic_scope.extend(generic_params.clone());
1159
1160        match ty {
1161            Type::Struct(s) => {
1162                for field in s.fields() {
1163                    self.resolve_field_references(type_id, field);
1164                }
1165            }
1166            Type::Enum(e) => {
1167                for variant in e.variants() {
1168                    for field in variant.fields() {
1169                        self.resolve_field_references(type_id, field);
1170                    }
1171                }
1172            }
1173            Type::Primitive(p) => {
1174                if let Some(fallback) = &p.fallback {
1175                    self.resolve_single_reference(type_id, fallback);
1176                }
1177            }
1178        }
1179
1180        for param in generic_params {
1181            self.context.generic_scope.remove(&param);
1182        }
1183    }
1184
1185    fn resolve_field_references(&mut self, owner_id: &SymbolId, field: &Field) {
1186        self.resolve_single_reference(owner_id, &field.type_ref);
1187    }
1188
1189    fn add_stdlib_types_to_cache(&mut self) {
1190        for &(name, kind) in STDLIB_TYPES {
1191            let path = name.split("::").map(|s| s.to_string()).collect();
1192            let symbol_id = SymbolId::new(kind, path);
1193            self.context
1194                .resolution_cache
1195                .insert(name.to_string(), symbol_id);
1196        }
1197    }
1198
1199    fn resolve_single_reference(&mut self, referrer: &SymbolId, type_ref: &TypeReference) {
1200        if self.context.generic_scope.contains(&type_ref.name) {
1201            for arg in &type_ref.arguments {
1202                self.resolve_single_reference(referrer, arg);
1203            }
1204            return;
1205        }
1206
1207        if let Some(target_id) = self.resolve_global_type_reference(&type_ref.name) {
1208            self.context
1209                .symbol_table
1210                .add_dependency(referrer.clone(), target_id);
1211        }
1212        // Unresolved references are silently ignored for now -
1213        // they'll be handled as placeholders in IR building
1214
1215        for arg in &type_ref.arguments {
1216            self.resolve_single_reference(referrer, arg);
1217        }
1218    }
1219
1220    fn resolve_global_type_reference(&self, name: &str) -> Option<SymbolId> {
1221        self.context.resolution_cache.get(name).cloned()
1222    }
1223
1224    fn analyze_dependencies(&mut self) -> Result<(), Vec<NormalizationError>> {
1225        match self.context.symbol_table.topological_sort() {
1226            Ok(_) => Ok(()),
1227            Err(_cycle) => {
1228                // Cycles may be expected after CircularDependencyResolutionStage
1229                Ok(())
1230            }
1231        }
1232    }
1233
1234    fn validate_semantics(&mut self) -> Result<(), Vec<NormalizationError>> {
1235        // TODO: Add semantic validation passes
1236        if self.context.has_errors() {
1237            return Err(self.context.take_errors());
1238        }
1239        Ok(())
1240    }
1241
1242    fn build_semantic_ir(
1243        self,
1244        schema: &Schema,
1245        original_names: &HashMap<String, String>,
1246    ) -> Result<SemanticSchema, Vec<NormalizationError>> {
1247        let mut semantic_types = BTreeMap::new();
1248        let mut semantic_functions = BTreeMap::new();
1249
1250        let sorted_symbols = match self.context.symbol_table.topological_sort() {
1251            Ok(sorted) => sorted,
1252            Err(_cycle) => self.context.symbol_table.symbols.keys().cloned().collect(),
1253        };
1254
1255        for symbol_id in sorted_symbols {
1256            if let Some(raw_type) = self.context.raw_types.get(&symbol_id) {
1257                let semantic_type = self.build_semantic_type(raw_type, original_names)?;
1258                semantic_types.insert(symbol_id, semantic_type);
1259            }
1260        }
1261
1262        for (function_id, raw_function) in &self.context.raw_functions {
1263            let semantic_function = self.build_semantic_function(raw_function)?;
1264            semantic_functions.insert(function_id.clone(), semantic_function);
1265        }
1266
1267        Ok(SemanticSchema {
1268            id: schema.id.clone(),
1269            name: schema.name.clone(),
1270            description: schema.description.clone(),
1271            functions: semantic_functions,
1272            types: semantic_types,
1273            symbol_table: self.context.symbol_table,
1274        })
1275    }
1276
1277    fn build_semantic_type(
1278        &self,
1279        raw_type: &Type,
1280        original_names: &HashMap<String, String>,
1281    ) -> Result<SemanticType, Vec<NormalizationError>> {
1282        match raw_type {
1283            Type::Primitive(p) => Ok(SemanticType::Primitive(
1284                self.build_semantic_primitive(p, original_names)?,
1285            )),
1286            Type::Struct(s) => Ok(SemanticType::Struct(
1287                self.build_semantic_struct(s, original_names)?,
1288            )),
1289            Type::Enum(e) => Ok(SemanticType::Enum(
1290                self.build_semantic_enum(e, original_names)?,
1291            )),
1292        }
1293    }
1294
1295    fn build_semantic_primitive(
1296        &self,
1297        primitive: &Primitive,
1298        original_names: &HashMap<String, String>,
1299    ) -> Result<SemanticPrimitive, Vec<NormalizationError>> {
1300        let fallback = primitive
1301            .fallback
1302            .as_ref()
1303            .and_then(|tr| self.resolve_global_type_reference(&tr.name));
1304
1305        let original_name = original_names
1306            .get(&primitive.name)
1307            .cloned()
1308            .unwrap_or_else(|| primitive.name.clone());
1309
1310        Ok(SemanticPrimitive {
1311            id: primitive.id.clone(),
1312            name: primitive.name.clone(),
1313            original_name,
1314            description: primitive.description.clone(),
1315            parameters: primitive
1316                .parameters
1317                .iter()
1318                .map(|p| SemanticTypeParameter {
1319                    name: p.name.clone(),
1320                    description: p.description.clone(),
1321                    bounds: vec![],
1322                    default: None,
1323                })
1324                .collect(),
1325            fallback,
1326        })
1327    }
1328
1329    fn build_semantic_struct(
1330        &self,
1331        strukt: &Struct,
1332        original_names: &HashMap<String, String>,
1333    ) -> Result<SemanticStruct, Vec<NormalizationError>> {
1334        let mut fields = BTreeMap::new();
1335
1336        for field in strukt.fields() {
1337            let semantic_field = self.build_semantic_field(field)?;
1338            fields.insert(field.id.clone(), semantic_field);
1339        }
1340
1341        let original_name = original_names
1342            .get(&strukt.name)
1343            .cloned()
1344            .unwrap_or_else(|| strukt.name.clone());
1345
1346        Ok(SemanticStruct {
1347            id: strukt.id.clone(),
1348            name: strukt.name.clone(),
1349            original_name,
1350            serde_name: strukt.serde_name.clone(),
1351            description: strukt.description.clone(),
1352            parameters: strukt
1353                .parameters
1354                .iter()
1355                .map(|p| SemanticTypeParameter {
1356                    name: p.name.clone(),
1357                    description: p.description.clone(),
1358                    bounds: vec![],
1359                    default: None,
1360                })
1361                .collect(),
1362            fields,
1363            transparent: strukt.transparent,
1364            is_tuple: strukt.is_tuple(),
1365            is_unit: strukt.is_unit(),
1366            codegen_config: strukt.codegen_config.clone(),
1367        })
1368    }
1369
1370    fn build_semantic_enum(
1371        &self,
1372        enm: &Enum,
1373        original_names: &HashMap<String, String>,
1374    ) -> Result<SemanticEnum, Vec<NormalizationError>> {
1375        let mut variants = BTreeMap::new();
1376
1377        for variant in enm.variants() {
1378            let semantic_variant = self.build_semantic_variant(variant)?;
1379            variants.insert(variant.id.clone(), semantic_variant);
1380        }
1381
1382        let original_name = original_names
1383            .get(&enm.name)
1384            .cloned()
1385            .unwrap_or_else(|| enm.name.clone());
1386
1387        Ok(SemanticEnum {
1388            id: enm.id.clone(),
1389            name: enm.name.clone(),
1390            original_name,
1391            serde_name: enm.serde_name.clone(),
1392            description: enm.description.clone(),
1393            parameters: enm
1394                .parameters
1395                .iter()
1396                .map(|p| SemanticTypeParameter {
1397                    name: p.name.clone(),
1398                    description: p.description.clone(),
1399                    bounds: vec![],
1400                    default: None,
1401                })
1402                .collect(),
1403            variants,
1404            representation: enm.representation.clone(),
1405            codegen_config: enm.codegen_config.clone(),
1406        })
1407    }
1408
1409    fn build_semantic_field(
1410        &self,
1411        field: &Field,
1412    ) -> Result<SemanticField, Vec<NormalizationError>> {
1413        let resolved_type_ref = self.build_resolved_type_reference(&field.type_ref)?;
1414
1415        Ok(SemanticField {
1416            id: field.id.clone(),
1417            name: field.name.clone(),
1418            serde_name: field.serde_name.clone(),
1419            description: field.description.clone(),
1420            deprecation_note: field.deprecation_note.clone(),
1421            type_ref: resolved_type_ref,
1422            required: field.required,
1423            flattened: field.flattened,
1424            transform_callback: field.transform_callback.clone(),
1425        })
1426    }
1427
1428    fn build_semantic_variant(
1429        &self,
1430        variant: &Variant,
1431    ) -> Result<SemanticVariant, Vec<NormalizationError>> {
1432        let mut fields = BTreeMap::new();
1433
1434        for field in variant.fields() {
1435            let semantic_field = self.build_semantic_field(field)?;
1436            fields.insert(field.id.clone(), semantic_field);
1437        }
1438
1439        let field_style = match &variant.fields {
1440            Fields::Named(_) => FieldStyle::Named,
1441            Fields::Unnamed(_) => FieldStyle::Unnamed,
1442            Fields::None => FieldStyle::Unit,
1443        };
1444
1445        Ok(SemanticVariant {
1446            id: variant.id.clone(),
1447            name: variant.name.clone(),
1448            serde_name: variant.serde_name.clone(),
1449            description: variant.description.clone(),
1450            fields,
1451            discriminant: variant.discriminant,
1452            untagged: variant.untagged,
1453            field_style,
1454        })
1455    }
1456
1457    fn build_semantic_function(
1458        &self,
1459        function: &Function,
1460    ) -> Result<SemanticFunction, Vec<NormalizationError>> {
1461        let input_type = function
1462            .input_type
1463            .as_ref()
1464            .and_then(|tr| self.resolve_global_type_reference(&tr.name));
1465        let input_headers = function
1466            .input_headers
1467            .as_ref()
1468            .and_then(|tr| self.resolve_global_type_reference(&tr.name));
1469        let output_type = function
1470            .output_type
1471            .as_ref()
1472            .and_then(|tr| self.resolve_global_type_reference(&tr.name));
1473        let error_type = function
1474            .error_type
1475            .as_ref()
1476            .and_then(|tr| self.resolve_global_type_reference(&tr.name));
1477
1478        Ok(SemanticFunction {
1479            id: function.id.clone(),
1480            name: function.name.clone(),
1481            path: function.path.clone(),
1482            description: function.description.clone(),
1483            deprecation_note: function.deprecation_note.clone(),
1484            input_type,
1485            input_headers,
1486            output_type,
1487            error_type,
1488            serialization: function.serialization.clone(),
1489            readonly: function.readonly,
1490            tags: function.tags.clone(),
1491        })
1492    }
1493
1494    fn build_resolved_type_reference(
1495        &self,
1496        type_ref: &TypeReference,
1497    ) -> Result<ResolvedTypeReference, Vec<NormalizationError>> {
1498        let is_likely_generic = !type_ref.name.contains("::");
1499
1500        let target = if let Some(target) = self.resolve_global_type_reference(&type_ref.name) {
1501            target
1502        } else if is_likely_generic {
1503            SymbolId::new(SymbolKind::TypeAlias, vec![type_ref.name.clone()])
1504        } else {
1505            SymbolId::new(SymbolKind::Struct, vec![type_ref.name.replace("::", "_")])
1506        };
1507
1508        let mut resolved_args = Vec::new();
1509        for arg in &type_ref.arguments {
1510            resolved_args.push(self.build_resolved_type_reference(arg)?);
1511        }
1512
1513        Ok(ResolvedTypeReference::new(
1514            target,
1515            resolved_args,
1516            type_ref.name.clone(),
1517        ))
1518    }
1519}
1520
1521impl Default for Normalizer {
1522    fn default() -> Self {
1523        Self::new()
1524    }
1525}
1526
1527// ---------------------------------------------------------------------------
1528// Tests
1529// ---------------------------------------------------------------------------
1530
1531#[cfg(test)]
1532mod tests {
1533    use super::*;
1534    use crate::{Fields, Function, Representation, Schema, Struct, TypeReference, Typespace};
1535
1536    #[test]
1537    fn test_basic_normalization() {
1538        let mut schema = Schema::new();
1539        schema.name = "TestSchema".to_string();
1540
1541        let user_struct = Struct::new("User");
1542        let user_type = Type::Struct(user_struct);
1543
1544        let mut input_types = Typespace::new();
1545        input_types.insert_type(user_type);
1546        schema.input_types = input_types;
1547
1548        let normalizer = Normalizer::new();
1549        let result = normalizer.normalize(&schema);
1550
1551        assert!(
1552            result.is_ok(),
1553            "Normalization should succeed for simple schema"
1554        );
1555
1556        let semantic_schema = result.unwrap();
1557        assert_eq!(semantic_schema.name, "TestSchema");
1558        assert_eq!(semantic_schema.types.len(), 1);
1559    }
1560
1561    #[test]
1562    fn test_unresolved_reference_handled_gracefully() {
1563        let mut schema = Schema::new();
1564        schema.name = "TestSchema".to_string();
1565
1566        let mut function = Function::new("test_function".to_string());
1567        function.input_type = Some(TypeReference::new("NonExistentType", vec![]));
1568        schema.functions.push(function);
1569
1570        let normalizer = Normalizer::new();
1571        let result = normalizer.normalize(&schema);
1572
1573        assert!(
1574            result.is_ok(),
1575            "Normalization should handle unresolved references gracefully"
1576        );
1577
1578        let semantic_schema = result.unwrap();
1579        assert!(!semantic_schema.functions.is_empty());
1580    }
1581
1582    #[test]
1583    fn test_normalize_with_functions_and_types() {
1584        let mut schema = Schema::new();
1585        schema.name = "API".to_string();
1586
1587        // Add types
1588        let mut user_struct = Struct::new("api::User");
1589        user_struct.fields = Fields::Named(vec![
1590            Field::new("name".into(), "std::string::String".into()),
1591            Field::new("age".into(), "u32".into()),
1592        ]);
1593        schema.input_types.insert_type(user_struct.into());
1594
1595        let mut error_enum = Enum::new("api::Error".into());
1596        error_enum.representation = Representation::Internal { tag: "type".into() };
1597        error_enum.variants = vec![
1598            Variant::new("NotFound".into()),
1599            Variant::new("Forbidden".into()),
1600        ];
1601        schema.output_types.insert_type(error_enum.into());
1602
1603        // Add a function referencing both types
1604        let mut function = Function::new("get_user".into());
1605        function.input_type = Some(TypeReference::new("api::User", vec![]));
1606        function.error_type = Some(TypeReference::new("api::Error", vec![]));
1607        schema.functions.push(function);
1608
1609        let normalizer = Normalizer::new();
1610        let result = normalizer.normalize(&schema);
1611        assert!(result.is_ok(), "Normalization failed: {:?}", result.err());
1612
1613        let semantic = result.unwrap();
1614        assert_eq!(semantic.types.len(), 2);
1615        assert_eq!(semantic.functions.len(), 1);
1616
1617        // Verify the function has resolved type references
1618        let func = semantic.functions.values().next().unwrap();
1619        assert!(func.input_type.is_some());
1620        assert!(func.error_type.is_some());
1621    }
1622
1623    #[test]
1624    fn test_normalize_function_with_input_headers() {
1625        let mut schema = Schema::new();
1626        schema.name = "API".to_string();
1627
1628        let headers_struct = Struct::new("Headers");
1629        schema.input_types.insert_type(headers_struct.into());
1630
1631        let body_struct = Struct::new("Body");
1632        schema.input_types.insert_type(body_struct.into());
1633
1634        let mut function = Function::new("do_thing".into());
1635        function.input_type = Some(TypeReference::new("Body", vec![]));
1636        function.input_headers = Some(TypeReference::new("Headers", vec![]));
1637        schema.functions.push(function);
1638
1639        let normalizer = Normalizer::new();
1640        let semantic = normalizer.normalize(&schema).unwrap();
1641
1642        let func = semantic.functions.values().next().unwrap();
1643        assert!(func.input_type.is_some());
1644        assert!(func.input_headers.is_some());
1645    }
1646
1647    #[test]
1648    fn test_type_consolidation_shared_name() {
1649        let mut schema = Schema::new();
1650        schema.name = "Test".to_string();
1651
1652        // Same simple name in both typespaces triggers conflict renaming
1653        let input_struct = Struct::new("Shared");
1654        let output_struct = Struct::new("Shared");
1655        schema.input_types.insert_type(input_struct.into());
1656        schema.output_types.insert_type(output_struct.into());
1657
1658        let stage = TypeConsolidationStage;
1659        stage.transform(&mut schema).unwrap();
1660
1661        // Both get prefixed since they share a simple name
1662        let type_names: Vec<_> = schema
1663            .input_types
1664            .types()
1665            .map(|t| t.name().to_string())
1666            .collect();
1667        assert!(
1668            type_names.contains(&"input.Shared".to_string()),
1669            "Expected input.Shared, got: {type_names:?}"
1670        );
1671        assert!(
1672            type_names.contains(&"output.Shared".to_string()),
1673            "Expected output.Shared, got: {type_names:?}"
1674        );
1675        assert!(schema.output_types.is_empty());
1676    }
1677
1678    #[test]
1679    fn test_type_consolidation_conflict_renaming() {
1680        let mut schema = Schema::new();
1681        schema.name = "Test".to_string();
1682
1683        // Different types sharing simple name get renamed
1684        let mut input_struct = Struct::new("Foo");
1685        input_struct.description = "input version".into();
1686        let mut output_struct = Struct::new("Foo");
1687        output_struct.description = "output version".into();
1688        // Make them different so they're not deduplicated
1689        output_struct.fields = Fields::Named(vec![Field::new("x".into(), "u32".into())]);
1690
1691        schema.input_types.insert_type(input_struct.into());
1692        schema.output_types.insert_type(output_struct.into());
1693
1694        let stage = TypeConsolidationStage;
1695        stage.transform(&mut schema).unwrap();
1696
1697        let type_names: Vec<_> = schema
1698            .input_types
1699            .types()
1700            .map(|t| t.name().to_string())
1701            .collect();
1702        assert!(
1703            type_names.contains(&"input.Foo".to_string())
1704                || type_names.contains(&"output.Foo".to_string()),
1705            "Expected conflict renaming, got: {type_names:?}"
1706        );
1707    }
1708
1709    #[test]
1710    fn test_ensure_symbol_ids_idempotent() {
1711        let mut schema = Schema::new();
1712        schema.name = "Test".to_string();
1713
1714        let mut user_struct = Struct::new("User");
1715        user_struct.fields = Fields::Named(vec![Field::new("id".into(), "u64".into())]);
1716        schema.input_types.insert_type(user_struct.into());
1717
1718        // Run twice
1719        crate::ensure_symbol_ids(&mut schema);
1720        let ids_first: Vec<_> = schema
1721            .input_types
1722            .types()
1723            .map(|t| match t {
1724                Type::Struct(s) => s.id.clone(),
1725                _ => unreachable!(),
1726            })
1727            .collect();
1728
1729        crate::ensure_symbol_ids(&mut schema);
1730        let ids_second: Vec<_> = schema
1731            .input_types
1732            .types()
1733            .map(|t| match t {
1734                Type::Struct(s) => s.id.clone(),
1735                _ => unreachable!(),
1736            })
1737            .collect();
1738
1739        assert_eq!(
1740            ids_first, ids_second,
1741            "ensure_symbol_ids should be idempotent"
1742        );
1743    }
1744
1745    #[test]
1746    fn test_ensure_symbol_ids_enum_variants_and_fields() {
1747        let mut schema = Schema::new();
1748        schema.name = "Test".to_string();
1749
1750        let mut enm = Enum::new("Status".into());
1751        let mut variant = Variant::new("Active".into());
1752        variant.fields = Fields::Named(vec![Field::new(
1753            "since".into(),
1754            "std::string::String".into(),
1755        )]);
1756        enm.variants = vec![variant, Variant::new("Inactive".into())];
1757        schema.input_types.insert_type(enm.into());
1758
1759        crate::ensure_symbol_ids(&mut schema);
1760
1761        let enm = schema
1762            .input_types
1763            .get_type("Status")
1764            .unwrap()
1765            .as_enum()
1766            .unwrap();
1767        assert!(!enm.id.is_unknown(), "Enum should have a non-unknown id");
1768
1769        for variant in &enm.variants {
1770            assert!(
1771                !variant.id.is_unknown(),
1772                "Variant '{}' should have a non-unknown id",
1773                variant.name
1774            );
1775            for field in variant.fields() {
1776                assert!(
1777                    !field.id.is_unknown(),
1778                    "Field '{}' in variant '{}' should have a non-unknown id",
1779                    field.name,
1780                    variant.name
1781                );
1782            }
1783        }
1784
1785        // Check paths are structured correctly
1786        let active = &enm.variants[0];
1787        assert_eq!(active.id.path.last().unwrap(), "Active");
1788        let since_field = active.fields().next().unwrap();
1789        assert!(
1790            since_field.id.path.contains(&"Active".to_string()),
1791            "Field path should include parent variant: {:?}",
1792            since_field.id.path
1793        );
1794    }
1795
1796    #[test]
1797    fn test_circular_dependency_detection() {
1798        let mut schema = Schema::new();
1799        schema.name = "Test".to_string();
1800
1801        // Node { children: Vec<Node> } - self-referential
1802        let mut node_struct = Struct::new("Node");
1803        node_struct.fields = Fields::Named(vec![Field::new(
1804            "children".into(),
1805            TypeReference::new("std::vec::Vec", vec![TypeReference::new("Node", vec![])]),
1806        )]);
1807        schema.input_types.insert_type(node_struct.into());
1808
1809        let stage = CircularDependencyResolutionStage::new();
1810        // Should detect the cycle but not fail (strategies are stubs)
1811        let result = stage.transform(&mut schema);
1812        assert!(result.is_ok());
1813    }
1814
1815    #[test]
1816    fn test_empty_schema_normalization() {
1817        let schema = Schema::new();
1818        let normalizer = Normalizer::new();
1819        let result = normalizer.normalize(&schema);
1820        assert!(result.is_ok());
1821
1822        let semantic = result.unwrap();
1823        assert!(semantic.types.is_empty());
1824        assert!(semantic.functions.is_empty());
1825    }
1826
1827    #[test]
1828    fn test_naming_resolution_all_conflicting_types_have_references_rewritten() {
1829        // Regression: NamingResolutionStage only tracked the first qualified name
1830        // per simple name in name_usage, leaving references to the second conflicting
1831        // type dangling after rename.
1832        let mut schema = Schema::new();
1833        schema.name = "Test".to_string();
1834
1835        // Two types sharing simple name "Foo" in different modules
1836        let a_foo = Struct::new("a::Foo");
1837        let b_foo = Struct::new("b::Foo");
1838        schema.input_types.insert_type(a_foo.into());
1839        schema.input_types.insert_type(b_foo.into());
1840
1841        // Function referencing BOTH types
1842        let mut func1 = Function::new("use_a_foo".into());
1843        func1.input_type = Some(TypeReference::new("a::Foo", vec![]));
1844        schema.functions.push(func1);
1845
1846        let mut func2 = Function::new("use_b_foo".into());
1847        func2.input_type = Some(TypeReference::new("b::Foo", vec![]));
1848        schema.functions.push(func2);
1849
1850        let stage = NamingResolutionStage;
1851        stage.transform(&mut schema).unwrap();
1852
1853        // Collect all type names defined in the schema
1854        let type_names: std::collections::HashSet<String> = schema
1855            .input_types
1856            .types()
1857            .map(|t| t.name().to_string())
1858            .collect();
1859
1860        // Both function references must point to names that exist in the schema
1861        for func in &schema.functions {
1862            if let Some(ref input_type) = func.input_type {
1863                assert!(
1864                    type_names.contains(&input_type.name),
1865                    "Function '{}' references type '{}' which doesn't exist in schema. Available: {:?}",
1866                    func.name, input_type.name, type_names
1867                );
1868            }
1869        }
1870    }
1871
1872    #[test]
1873    fn test_generate_unique_name_excluded_modules_no_collision() {
1874        // Regression: when all module parts are in the exclusion list ("model", "proto"),
1875        // the fallback was module_parts[0], causing "model::Foo" and "model::proto::Foo"
1876        // to both become "ModelFoo". Now uses joined fallback to avoid collisions.
1877        let name1 = generate_unique_name("model::Foo");
1878        let name2 = generate_unique_name("model::proto::Foo");
1879
1880        assert_ne!(
1881            name1, name2,
1882            "model::Foo and model::proto::Foo must produce different names, got '{name1}' and '{name2}'"
1883        );
1884    }
1885
1886    #[test]
1887    fn test_generate_unique_name_with_non_excluded_module() {
1888        // Normal case: module part not in exclusion list is used as prefix
1889        let name = generate_unique_name("billing::Invoice");
1890        assert_eq!(name, "BillingInvoice");
1891    }
1892
1893    #[test]
1894    fn test_self_referential_type_normalizes_successfully() {
1895        // A self-referential type (cycle of length 1) should pass through the
1896        // full Normalizer pipeline without error.  In Rust the schema already
1897        // records Box<T> wrappers, so the boxing strategy is intentionally a
1898        // no-op — the cycle is detected but does not block normalization.
1899        let mut schema = Schema::new();
1900        schema.name = "TreeSchema".to_string();
1901
1902        // TreeNode has a field `children` of type Vec<TreeNode> (indirect
1903        // self-reference via a container — already broken by Vec) and a field
1904        // `parent` that directly references TreeNode (direct self-reference,
1905        // which in real Rust code would be Box<TreeNode>).
1906        let mut tree_node = Struct::new("TreeNode");
1907        tree_node.fields = Fields::Named(vec![
1908            Field::new("label".into(), "std::string::String".into()),
1909            Field::new(
1910                "children".into(),
1911                TypeReference::new(
1912                    "std::vec::Vec",
1913                    vec![TypeReference::new("TreeNode", vec![])],
1914                ),
1915            ),
1916            Field::new(
1917                "parent".into(),
1918                TypeReference::new(
1919                    "std::boxed::Box",
1920                    vec![TypeReference::new("TreeNode", vec![])],
1921                ),
1922            ),
1923        ]);
1924        schema.input_types.insert_type(tree_node.into());
1925
1926        let normalizer = Normalizer::new();
1927        let result = normalizer.normalize(&schema);
1928
1929        assert!(
1930            result.is_ok(),
1931            "Self-referential type should not prevent normalization: {:?}",
1932            result.err()
1933        );
1934
1935        let semantic = result.unwrap();
1936        assert_eq!(semantic.types.len(), 1, "TreeNode type should be present");
1937
1938        // Verify the type round-tripped with the expected name
1939        let tree_node_type = semantic.types.values().next().unwrap();
1940        match tree_node_type {
1941            SemanticType::Struct(s) => {
1942                assert_eq!(s.name, "TreeNode");
1943                assert_eq!(s.fields.len(), 3, "All three fields should survive");
1944            }
1945            other => panic!("Expected Struct, got {:?}", std::mem::discriminant(other)),
1946        }
1947    }
1948
1949    #[test]
1950    fn test_multi_type_cycle_normalizes_successfully() {
1951        // A → B → A cycle (length 2) should also pass through normalization
1952        // without error.  The forward-declaration strategy is likewise a no-op
1953        // for Rust schemas.
1954        let mut schema = Schema::new();
1955        schema.name = "CycleSchema".to_string();
1956
1957        // Department references Employee, Employee references Department
1958        let mut department = Struct::new("Department");
1959        department.fields = Fields::Named(vec![
1960            Field::new("name".into(), "std::string::String".into()),
1961            Field::new("manager".into(), TypeReference::new("Employee", vec![])),
1962        ]);
1963
1964        let mut employee = Struct::new("Employee");
1965        employee.fields = Fields::Named(vec![
1966            Field::new("name".into(), "std::string::String".into()),
1967            Field::new(
1968                "department".into(),
1969                TypeReference::new("Department", vec![]),
1970            ),
1971        ]);
1972
1973        schema.input_types.insert_type(department.into());
1974        schema.input_types.insert_type(employee.into());
1975
1976        let normalizer = Normalizer::new();
1977        let result = normalizer.normalize(&schema);
1978
1979        assert!(
1980            result.is_ok(),
1981            "Multi-type cycle should not prevent normalization: {:?}",
1982            result.err()
1983        );
1984
1985        let semantic = result.unwrap();
1986        assert_eq!(
1987            semantic.types.len(),
1988            2,
1989            "Both Department and Employee types should be present"
1990        );
1991    }
1992
1993    #[test]
1994    fn test_type_consolidation_qualified_name_uniqueness() {
1995        // Regression: when input types `a::Foo` and `b::Foo` both conflict with
1996        // an output type `c::Foo`, all three must receive distinct names after
1997        // consolidation — no silent drops.
1998        let mut schema = Schema::new();
1999        schema.name = "Test".to_string();
2000
2001        let a_foo = Struct::new("a::Foo");
2002        let b_foo = Struct::new("b::Foo");
2003        let c_foo = Struct::new("c::Foo");
2004
2005        schema.input_types.insert_type(a_foo.into());
2006        schema.input_types.insert_type(b_foo.into());
2007        schema.output_types.insert_type(c_foo.into());
2008
2009        let stage = TypeConsolidationStage;
2010        stage.transform(&mut schema).unwrap();
2011
2012        let type_names: Vec<String> = schema
2013            .input_types
2014            .types()
2015            .map(|t| t.name().to_string())
2016            .collect();
2017
2018        // All three should be present with distinct names
2019        assert_eq!(
2020            type_names.len(),
2021            3,
2022            "All three Foo types should survive consolidation, got: {type_names:?}"
2023        );
2024
2025        // Verify uniqueness — no two names are the same
2026        let unique_names: std::collections::HashSet<&String> = type_names.iter().collect();
2027        assert_eq!(
2028            unique_names.len(),
2029            3,
2030            "All three names should be distinct, got: {type_names:?}"
2031        );
2032
2033        // Verify the naming convention: input types get "input." prefix,
2034        // output types get "output." prefix
2035        let has_input_a = type_names
2036            .iter()
2037            .any(|n| n.contains("input") && n.contains("a"));
2038        let has_input_b = type_names
2039            .iter()
2040            .any(|n| n.contains("input") && n.contains("b"));
2041        let has_output_c = type_names
2042            .iter()
2043            .any(|n| n.contains("output") && n.contains("c"));
2044        assert!(
2045            has_input_a,
2046            "Expected an input.a.Foo variant, got: {type_names:?}"
2047        );
2048        assert!(
2049            has_input_b,
2050            "Expected an input.b.Foo variant, got: {type_names:?}"
2051        );
2052        assert!(
2053            has_output_c,
2054            "Expected an output.c.Foo variant, got: {type_names:?}"
2055        );
2056    }
2057
2058    #[test]
2059    fn test_resolve_types_does_not_confuse_variant_with_type() {
2060        // Regression: the resolve_types phase should resolve a function's type
2061        // reference "Status" to the Struct named "Status", not to an enum variant
2062        // that happens to also be named "Status".
2063        let mut schema = Schema::new();
2064        schema.name = "Test".to_string();
2065
2066        // A struct named "Status"
2067        let status_struct = Struct::new("Status");
2068        schema.input_types.insert_type(status_struct.into());
2069
2070        // An enum with a variant named "Status"
2071        let mut state_enum = Enum::new("State".into());
2072        state_enum.variants = vec![Variant::new("Status".into()), Variant::new("Error".into())];
2073        schema.input_types.insert_type(state_enum.into());
2074
2075        // A function that references "Status" — should resolve to the Struct
2076        let mut function = Function::new("get_status".into());
2077        function.input_type = Some(TypeReference::new("Status", vec![]));
2078        schema.functions.push(function);
2079
2080        let normalizer = Normalizer::new();
2081        let result = normalizer.normalize(&schema);
2082        assert!(
2083            result.is_ok(),
2084            "Normalization should succeed: {:?}",
2085            result.err()
2086        );
2087
2088        let semantic = result.unwrap();
2089        let func = semantic.functions.values().next().unwrap();
2090
2091        // The function's input_type should resolve to the Status struct's ID
2092        let resolved_id = func
2093            .input_type
2094            .as_ref()
2095            .expect("input_type should be resolved");
2096
2097        // It should be a Struct kind, not a Variant kind
2098        assert_eq!(
2099            resolved_id.kind,
2100            crate::SymbolKind::Struct,
2101            "Function's input_type should resolve to a Struct, not a Variant. Got: {resolved_id:?}"
2102        );
2103    }
2104
2105    #[test]
2106    fn test_generate_unique_name_same_inner_module() {
2107        // Regression: two types with the same inner module and type name but
2108        // different outer modules must produce different unique names.
2109        let name_a = generate_unique_name("services::user::Profile");
2110        let name_b = generate_unique_name("auth::user::Profile");
2111
2112        assert_ne!(
2113            name_a, name_b,
2114            "services::user::Profile and auth::user::Profile must produce different names, \
2115             got '{name_a}' and '{name_b}'"
2116        );
2117
2118        // Verify they follow the expected PascalCase convention
2119        assert!(
2120            name_a.contains("Services") || name_a.contains("services"),
2121            "Expected 'services' component in name, got '{name_a}'"
2122        );
2123        assert!(
2124            name_b.contains("Auth") || name_b.contains("auth"),
2125            "Expected 'auth' component in name, got '{name_b}'"
2126        );
2127    }
2128
2129    #[test]
2130    fn test_function_symbol_path_matches_id() {
2131        // Regression: after normalization, a function's SymbolId should be
2132        // retrievable from the symbol table via its path.
2133        let mut schema = Schema::new();
2134        schema.name = "API".to_string();
2135
2136        let mut function = Function::new("get_user".into());
2137        function.input_type = None;
2138        function.output_type = None;
2139        schema.functions.push(function);
2140
2141        let normalizer = Normalizer::new();
2142        let semantic = normalizer
2143            .normalize(&schema)
2144            .expect("Normalization should succeed");
2145
2146        // Get the function's ID
2147        let (function_id, _) = semantic.functions.iter().next().unwrap();
2148
2149        // Verify the symbol table can find it by path
2150        let found = semantic.symbol_table.get_by_path(&function_id.path);
2151        assert!(
2152            found.is_some(),
2153            "symbol_table.get_by_path({:?}) should return Some, but got None. \
2154             Function ID: {function_id:?}",
2155            function_id.path
2156        );
2157
2158        let symbol_info = found.unwrap();
2159        assert_eq!(
2160            symbol_info.kind,
2161            crate::SymbolKind::Endpoint,
2162            "Symbol should be an Endpoint, got {:?}",
2163            symbol_info.kind
2164        );
2165    }
2166}