hemmer_provider_generator_common/
lib.rs

1//! Common types and utilities for the Hemmer Provider Generator
2//!
3//! This crate contains shared data structures, error types, and utilities
4//! used across the parser, generator, and CLI components.
5//!
6//! ## Architecture
7//!
8//! The generator follows this data flow:
9//! 1. **Parser**: SDK crates → ServiceDefinition (intermediate representation)
10//! 2. **Generator**: ServiceDefinition → Generated code (provider.k + Rust)
11//! 3. **Output**: Generated provider implementing ProviderExecutor trait
12
13pub mod sdk_metadata;
14
15use serde::{Deserialize, Serialize};
16use std::collections::HashMap;
17use thiserror::Error;
18
19/// Errors that can occur during provider generation
20#[derive(Error, Debug)]
21pub enum GeneratorError {
22    #[error("Parse error: {0}")]
23    Parse(String),
24
25    #[error("Generation error: {0}")]
26    Generation(String),
27
28    #[error("IO error: {0}")]
29    Io(#[from] std::io::Error),
30
31    #[error("JSON error: {0}")]
32    Json(#[from] serde_json::Error),
33}
34
35/// Result type for generator operations
36pub type Result<T> = std::result::Result<T, GeneratorError>;
37
38/// Represents a cloud provider type
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub enum Provider {
41    Aws,
42    Gcp,
43    Azure,
44    Kubernetes,
45    /// Custom provider loaded from metadata file
46    Custom(String),
47}
48
49/// Provider-specific SDK configuration for code generation
50///
51/// This struct contains all the provider-specific patterns needed to generate
52/// SDK client code without hardcoding provider checks in templates.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct ProviderSdkConfig {
55    /// SDK crate naming pattern (e.g., "aws-sdk-{service}" where {service} is replaced)
56    pub sdk_crate_pattern: String,
57    /// Client type pattern (e.g., "aws_sdk_{service}::Client")
58    pub client_type_pattern: String,
59    /// Config crate name (e.g., "aws-config")
60    pub config_crate: Option<String>,
61    /// Whether the SDK uses async clients
62    pub async_client: bool,
63    /// Region attribute name (e.g., "region" for AWS, "location" for GCP)
64    pub region_attr: Option<String>,
65    /// Additional config attributes specific to this provider
66    pub config_attrs: Vec<ProviderConfigAttr>,
67    /// Configuration code generation patterns
68    pub config_codegen: ConfigCodegen,
69    /// Additional SDK dependencies required (beyond the service SDK crate)
70    /// Example: ["aws-config = \"1\"", "aws-smithy-types = \"1\""]
71    pub additional_dependencies: Vec<String>,
72    /// Error metadata trait import path (if provider has one)
73    /// Example: Some("aws_smithy_types::error::metadata::ProvideErrorMetadata")
74    pub error_metadata_import: Option<String>,
75    /// Error categorization function (Rust code as string)
76    /// This function converts SDK errors to ProviderError enum variants
77    pub error_categorization_fn: Option<String>,
78}
79
80/// A provider-specific configuration attribute with code generation metadata
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct ProviderConfigAttr {
83    /// Attribute name (e.g., "profile", "project_id")
84    pub name: String,
85    /// Human-readable description
86    pub description: String,
87    /// Whether this attribute is required
88    pub required: bool,
89    /// Code snippet for setting this config value
90    /// Uses {value} placeholder for the extracted JSON value
91    /// Example: "config_loader.region(aws_config::Region::new({value}.to_string()))"
92    #[serde(default)]
93    pub setter_snippet: Option<String>,
94    /// Type conversion expression for JSON value extraction
95    /// Example: "as_str().map(|s| s.to_string())"
96    #[serde(default)]
97    pub value_extractor: Option<String>,
98}
99
100/// Configuration code generation patterns for a provider
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct ConfigCodegen {
103    /// Code to initialize the config loader/builder
104    /// Example (AWS): "aws_config::from_env()"
105    /// Example (GCP): "ClientConfig::default()"
106    pub init_snippet: String,
107
108    /// Code to finalize and load the config
109    /// Example (AWS): "config_loader.load().await"
110    /// Example (K8s): "Config::from_kubeconfig(&kubeconfig_data).await"
111    pub load_snippet: String,
112
113    /// Code to create client from config
114    /// Uses {config} placeholder for the loaded config variable
115    /// Example (AWS): "{client_type}::new(&{config})"
116    /// Example (K8s): "Client::try_from({config})"
117    pub client_from_config: String,
118
119    /// Variable name for the config loader/builder
120    /// Example: "config_loader" or "config_builder"
121    pub config_var_name: String,
122
123    /// Variable name for the loaded config
124    /// Example: "sdk_config" or "config"
125    pub loaded_config_var_name: String,
126}
127
128impl Provider {
129    /// Create a Provider from a name string
130    ///
131    /// For built-in providers (aws, gcp, azure, kubernetes), returns the corresponding enum variant.
132    /// For other names, attempts to load from metadata file at `providers/{name}.sdk-metadata.yaml`.
133    ///
134    /// # Arguments
135    /// * `name` - Provider name (e.g., "aws", "gcp", "my-custom-provider")
136    ///
137    /// # Returns
138    /// * `Ok(Provider)` if the provider is built-in or has a valid metadata file
139    /// * `Err(GeneratorError)` if the provider is unknown and has no metadata file
140    pub fn from_name(name: &str) -> Result<Self> {
141        match name {
142            "aws" => Ok(Provider::Aws),
143            "gcp" => Ok(Provider::Gcp),
144            "azure" => Ok(Provider::Azure),
145            "kubernetes" => Ok(Provider::Kubernetes),
146            // Dynamic loading for custom providers with metadata files
147            other => {
148                let metadata_path = format!("providers/{}.sdk-metadata.yaml", other);
149                if std::path::Path::new(&metadata_path).exists() {
150                    Ok(Provider::Custom(other.to_string()))
151                } else {
152                    Err(GeneratorError::Parse(format!(
153                        "Unknown provider '{}'. No metadata file found at {}",
154                        other, metadata_path
155                    )))
156                }
157            },
158        }
159    }
160
161    /// Get the provider name as a string
162    ///
163    /// # Returns
164    /// The provider identifier (e.g., "aws", "gcp", "my-custom-provider")
165    pub fn name(&self) -> &str {
166        match self {
167            Provider::Aws => "aws",
168            Provider::Gcp => "gcp",
169            Provider::Azure => "azure",
170            Provider::Kubernetes => "kubernetes",
171            Provider::Custom(name) => name,
172        }
173    }
174
175    /// Get the SDK configuration for this provider
176    ///
177    /// This method loads provider configuration from YAML metadata files in the `providers/` directory.
178    /// For built-in providers, it loads from `providers/{provider}.sdk-metadata.yaml`.
179    /// For custom providers, it loads from the path specified during creation.
180    ///
181    /// # Panics
182    /// Panics if the metadata file cannot be loaded or parsed. This is intentional as provider
183    /// configuration is required for code generation and should be validated at startup.
184    pub fn sdk_config(&self) -> ProviderSdkConfig {
185        let provider_name = self.name();
186        let metadata_filename = format!("{}.sdk-metadata.yaml", provider_name);
187
188        // Try multiple paths to find the metadata file:
189        // 1. Workspace root (when running from workspace)
190        // 2. Two levels up from CARGO_MANIFEST_DIR (when running tests from crate)
191        let mut paths_to_try = vec![std::path::PathBuf::from("providers").join(&metadata_filename)];
192
193        // Add path relative to crate directory (for tests)
194        if let Ok(manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
195            paths_to_try.push(
196                std::path::PathBuf::from(manifest_dir)
197                    .parent()
198                    .and_then(|p| p.parent())
199                    .map(|p| p.join("providers").join(&metadata_filename))
200                    .expect("Failed to construct path from CARGO_MANIFEST_DIR"),
201            );
202        }
203
204        // Try each path until we find one that exists
205        for path in &paths_to_try {
206            if path.exists() {
207                return sdk_metadata::ProviderSdkMetadata::load(path)
208                    .unwrap_or_else(|e| {
209                        panic!(
210                            "Failed to load provider metadata for '{}' from {}: {}",
211                            provider_name,
212                            path.display(),
213                            e
214                        )
215                    })
216                    .to_provider_config();
217            }
218        }
219
220        panic!(
221            "Could not find provider metadata file {} in any of these locations: {}",
222            metadata_filename,
223            paths_to_try
224                .iter()
225                .map(|p| p.display().to_string())
226                .collect::<Vec<_>>()
227                .join(", ")
228        );
229    }
230
231    /// Get the SDK crate name for a specific service
232    pub fn sdk_crate_for_service(&self, service: &str) -> String {
233        self.sdk_config()
234            .sdk_crate_pattern
235            .replace("{service}", service)
236    }
237
238    /// Get the client type for a specific service
239    pub fn client_type_for_service(&self, service: &str) -> String {
240        self.sdk_config()
241            .client_type_pattern
242            .replace("{service}", service)
243    }
244
245    /// Check if this provider uses a shared client (like Kubernetes)
246    /// vs per-service clients (like AWS)
247    pub fn uses_shared_client(&self) -> bool {
248        match self {
249            Provider::Kubernetes => true,
250            Provider::Custom(_) => {
251                // Check if the SDK crate pattern contains {service}
252                // If it doesn't, it's a shared client like Kubernetes
253                !self.sdk_config().sdk_crate_pattern.contains("{service}")
254            },
255            _ => false,
256        }
257    }
258}
259
260/// Intermediate representation of a unified cloud provider with multiple services
261///
262/// This represents a complete provider (e.g., AWS) with all its services.
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct ProviderDefinition {
265    /// Cloud provider type
266    pub provider: Provider,
267    /// Provider name for code generation (e.g., "aws", "gcp")
268    pub provider_name: String,
269    /// SDK version
270    pub sdk_version: String,
271    /// All services in this provider
272    pub services: Vec<ServiceDefinition>,
273}
274
275/// Intermediate representation of a cloud service (e.g., aws-sdk-s3)
276///
277/// This is the output of the parser phase and input to the generator phase.
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct ServiceDefinition {
280    /// Provider this service belongs to
281    pub provider: Provider,
282    /// Service name (e.g., "s3", "ec2")
283    pub name: String,
284    /// SDK version this was parsed from
285    pub sdk_version: String,
286    /// Resources discovered in this service
287    pub resources: Vec<ResourceDefinition>,
288    /// Data sources for read-only lookups
289    pub data_sources: Vec<DataSourceDefinition>,
290}
291
292/// Nesting mode for block types
293#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
294pub enum NestingMode {
295    /// Exactly one block (e.g., `logging { ... }`)
296    Single,
297    /// Ordered list of blocks (e.g., `rule { ... } rule { ... }`)
298    List,
299    /// Unordered set of blocks
300    Set,
301    /// Map of blocks keyed by an attribute
302    Map,
303}
304
305/// Definition of a nested block type within a resource
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub struct BlockDefinition {
308    /// Block type name (e.g., "lifecycle_rule", "ingress")
309    pub name: String,
310    /// Human-readable description
311    pub description: Option<String>,
312    /// Nested attributes within this block
313    pub attributes: Vec<FieldDefinition>,
314    /// Further nested blocks (recursive)
315    pub blocks: Vec<BlockDefinition>,
316    /// Nesting mode: single, list, set, map
317    pub nesting_mode: NestingMode,
318    /// Minimum number of occurrences (0 for optional)
319    pub min_items: u32,
320    /// Maximum number of occurrences (0 for unlimited)
321    pub max_items: u32,
322
323    // SDK-specific metadata for code generation
324    /// SDK type name for this block (e.g., "LifecycleRule", "Container")
325    /// Used to generate builder code: `LifecycleRule::builder()`
326    #[serde(default)]
327    pub sdk_type_name: Option<String>,
328
329    /// SDK accessor/setter method name (e.g., "set_lifecycle_rules", "containers")
330    /// Used in request builders: `request.set_lifecycle_rules(rules)`
331    #[serde(default)]
332    pub sdk_accessor_method: Option<String>,
333}
334
335/// Definition of a single resource type (e.g., S3 Bucket, EC2 Instance)
336#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct ResourceDefinition {
338    /// Resource type name (e.g., "bucket", "instance")
339    pub name: String,
340    /// Human-readable description
341    pub description: Option<String>,
342    /// Input fields for creating/updating the resource
343    pub fields: Vec<FieldDefinition>,
344    /// Output fields returned after operations
345    pub outputs: Vec<FieldDefinition>,
346    /// Nested block types (e.g., lifecycle_rule, ingress)
347    pub blocks: Vec<BlockDefinition>,
348    /// CRUD operations available for this resource
349    pub operations: Operations,
350    /// Primary identifier field name (e.g., "bucket_name", "id")
351    pub id_field: Option<String>,
352}
353
354/// Definition of a data source (read-only resource lookup)
355#[derive(Debug, Clone, Serialize, Deserialize)]
356pub struct DataSourceDefinition {
357    /// Data source name (e.g., "vpc", "ami")
358    pub name: String,
359    /// Human-readable description
360    pub description: Option<String>,
361    /// Input arguments/filters for the lookup
362    pub arguments: Vec<FieldDefinition>,
363    /// Output attributes returned from the data source
364    pub attributes: Vec<FieldDefinition>,
365    /// SDK operation to fetch the data
366    pub read_operation: OperationMapping,
367}
368
369/// CRUD operations mapped from SDK operations
370#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct Operations {
372    /// Create operation (e.g., CreateBucket)
373    pub create: Option<OperationMapping>,
374    /// Read operation (e.g., HeadBucket, GetBucket)
375    pub read: Option<OperationMapping>,
376    /// Update operation (e.g., PutBucketAcl)
377    pub update: Option<OperationMapping>,
378    /// Delete operation (e.g., DeleteBucket)
379    pub delete: Option<OperationMapping>,
380    /// Import operation (often same as read, e.g., HeadBucket)
381    pub import: Option<OperationMapping>,
382}
383
384/// Mapping of a CRUD operation to SDK operation(s)
385#[derive(Debug, Clone, Serialize, Deserialize)]
386pub struct OperationMapping {
387    /// SDK operation name (e.g., "create_bucket")
388    pub sdk_operation: String,
389    /// Additional operations that might be needed (e.g., for composite updates)
390    pub additional_operations: Vec<String>,
391}
392
393/// Definition of a field in a resource
394#[derive(Debug, Clone, Serialize, Deserialize)]
395pub struct FieldDefinition {
396    /// Field name (snake_case)
397    pub name: String,
398    /// Field type
399    pub field_type: FieldType,
400    /// Whether this field is required
401    pub required: bool,
402    /// Whether this field is sensitive (passwords, keys)
403    pub sensitive: bool,
404    /// Whether this field is immutable (requires replacement if changed)
405    pub immutable: bool,
406    /// Human-readable description
407    pub description: Option<String>,
408    /// For output fields: the SDK response accessor method name (snake_case)
409    /// e.g., "bucket_arn" for response.bucket_arn()
410    /// If None, defaults to the field name
411    #[serde(default)]
412    pub response_accessor: Option<String>,
413}
414
415/// Represents a field type in the intermediate representation
416///
417/// Maps SDK types → IR types → KCL types → Generated Rust types
418#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
419pub enum FieldType {
420    /// String type
421    String,
422    /// Integer type (i64)
423    Integer,
424    /// Boolean type
425    Boolean,
426    /// Float type (f64)
427    Float,
428    /// List/Array of items
429    List(Box<FieldType>),
430    /// Map/Dictionary
431    Map(Box<FieldType>, Box<FieldType>),
432    /// Custom enum (represented as string variants)
433    Enum(Vec<String>),
434    /// DateTime (ISO 8601 string)
435    DateTime,
436    /// Nested object (represented as map)
437    Object(HashMap<String, Box<FieldType>>),
438}
439
440impl FieldType {
441    /// Convert to KCL type string for manifest generation
442    pub fn to_kcl_type(&self) -> String {
443        match self {
444            FieldType::String => "String".to_string(),
445            FieldType::Integer => "Integer".to_string(),
446            FieldType::Boolean => "Boolean".to_string(),
447            FieldType::Float => "Float".to_string(),
448            FieldType::List(inner) => format!("List<{}>", inner.to_kcl_type()),
449            FieldType::Map(k, v) => format!("Map<{},{}>", k.to_kcl_type(), v.to_kcl_type()),
450            FieldType::Enum(_) => "String".to_string(), // Enums become strings
451            FieldType::DateTime => "String".to_string(), // ISO 8601
452            FieldType::Object(_) => "Map<String,Any>".to_string(),
453        }
454    }
455
456    /// Convert to Rust type string for code generation
457    pub fn to_rust_type(&self) -> String {
458        match self {
459            FieldType::String => "String".to_string(),
460            FieldType::Integer => "i64".to_string(),
461            FieldType::Boolean => "bool".to_string(),
462            FieldType::Float => "f64".to_string(),
463            FieldType::List(inner) => format!("Vec<{}>", inner.to_rust_type()),
464            FieldType::Map(k, v) => {
465                format!("HashMap<{}, {}>", k.to_rust_type(), v.to_rust_type())
466            },
467            FieldType::Enum(_) => "String".to_string(),
468            FieldType::DateTime => "String".to_string(),
469            FieldType::Object(_) => "HashMap<String, serde_json::Value>".to_string(),
470        }
471    }
472}
473
474/// Metadata about an SDK
475#[derive(Debug, Clone, Serialize, Deserialize)]
476pub struct SdkMetadata {
477    /// Cloud provider
478    pub provider: Provider,
479    /// SDK version
480    pub sdk_version: String,
481    /// SDK name (e.g., "aws-sdk-rust", "google-cloud-rust")
482    pub sdk_name: String,
483}
484
485/// Trait for parsing SDK crates into ServiceDefinition IR
486///
487/// This trait enables a plugin-like architecture where:
488/// - Built-in parsers are provided for AWS, GCP, Azure
489/// - Custom parsers can be implemented for any SDK
490///
491/// # Example
492///
493/// ```rust
494/// use hemmer_provider_generator_common::{SdkParser, ServiceDefinition, SdkMetadata, Provider, Result};
495///
496/// struct MyCustomParser {
497///     service_name: String,
498///     sdk_version: String,
499/// }
500///
501/// impl SdkParser for MyCustomParser {
502///     fn parse(&self) -> Result<ServiceDefinition> {
503///         // Parse your SDK and return ServiceDefinition
504///         todo!("Implement custom parsing logic")
505///     }
506///
507///     fn supported_services(&self) -> Vec<String> {
508///         vec!["my-service".to_string()]
509///     }
510///
511///     fn metadata(&self) -> SdkMetadata {
512///         SdkMetadata {
513///             provider: Provider::Aws, // or your custom provider
514///             sdk_version: self.sdk_version.clone(),
515///             sdk_name: "my-custom-sdk".to_string(),
516///         }
517///     }
518/// }
519/// ```
520pub trait SdkParser: Send + Sync {
521    /// Parse the SDK and return service definition
522    ///
523    /// This method should:
524    /// 1. Load SDK metadata (rustdoc JSON, OpenAPI spec, etc.)
525    /// 2. Extract operations and types
526    /// 3. Build ResourceDefinition instances
527    /// 4. Return complete ServiceDefinition
528    fn parse(&self) -> Result<ServiceDefinition>;
529
530    /// List all services exposed by this SDK
531    ///
532    /// For AWS: ["s3", "ec2", "dynamodb", ...]
533    /// For GCP: ["storage", "compute", ...]
534    fn supported_services(&self) -> Vec<String>;
535
536    /// Get metadata about the SDK
537    ///
538    /// Returns information about the SDK provider, version, and name
539    fn metadata(&self) -> SdkMetadata;
540}
541
542/// Registry for managing SDK parsers
543///
544/// This registry allows:
545/// - Registering built-in parsers (AWS, GCP, Azure)
546/// - Registering custom user-provided parsers
547/// - Retrieving parsers by provider name
548///
549/// # Example
550///
551/// ```rust
552/// use hemmer_provider_generator_common::{ParserRegistry, Provider};
553/// # use hemmer_provider_generator_common::{SdkParser, ServiceDefinition, SdkMetadata, Result};
554/// #
555/// # struct MyParser;
556/// # impl SdkParser for MyParser {
557/// #     fn parse(&self) -> Result<ServiceDefinition> { todo!() }
558/// #     fn supported_services(&self) -> Vec<String> { vec![] }
559/// #     fn metadata(&self) -> SdkMetadata {
560/// #         SdkMetadata {
561/// #             provider: Provider::Aws,
562/// #             sdk_version: "1.0.0".to_string(),
563/// #             sdk_name: "test".to_string(),
564/// #         }
565/// #     }
566/// # }
567///
568/// let mut registry = ParserRegistry::new();
569/// registry.register("aws", Box::new(MyParser));
570///
571/// let parser = registry.get("aws");
572/// assert!(parser.is_some());
573/// ```
574pub struct ParserRegistry {
575    parsers: HashMap<String, Box<dyn SdkParser>>,
576}
577
578impl ParserRegistry {
579    /// Create a new empty parser registry
580    pub fn new() -> Self {
581        Self {
582            parsers: HashMap::new(),
583        }
584    }
585
586    /// Register a parser with a given name
587    ///
588    /// # Arguments
589    /// * `name` - Provider name (e.g., "aws", "gcp", "azure")
590    /// * `parser` - Boxed parser implementing SdkParser trait
591    ///
592    /// # Example
593    /// ```rust
594    /// # use hemmer_provider_generator_common::{ParserRegistry, SdkParser, ServiceDefinition, SdkMetadata, Provider, Result};
595    /// # struct MyParser;
596    /// # impl SdkParser for MyParser {
597    /// #     fn parse(&self) -> Result<ServiceDefinition> { todo!() }
598    /// #     fn supported_services(&self) -> Vec<String> { vec![] }
599    /// #     fn metadata(&self) -> SdkMetadata {
600    /// #         SdkMetadata {
601    /// #             provider: Provider::Aws,
602    /// #             sdk_version: "1.0.0".to_string(),
603    /// #             sdk_name: "test".to_string(),
604    /// #         }
605    /// #     }
606    /// # }
607    /// let mut registry = ParserRegistry::new();
608    /// registry.register("my-provider", Box::new(MyParser));
609    /// ```
610    pub fn register(&mut self, name: &str, parser: Box<dyn SdkParser>) {
611        self.parsers.insert(name.to_string(), parser);
612    }
613
614    /// Get a parser by provider name
615    ///
616    /// Returns `None` if no parser is registered with the given name.
617    pub fn get(&self, name: &str) -> Option<&dyn SdkParser> {
618        self.parsers.get(name).map(|p| p.as_ref())
619    }
620
621    /// List all registered provider names
622    pub fn list_providers(&self) -> Vec<String> {
623        self.parsers.keys().cloned().collect()
624    }
625
626    /// Check if a provider is registered
627    pub fn has_provider(&self, name: &str) -> bool {
628        self.parsers.contains_key(name)
629    }
630}
631
632impl Default for ParserRegistry {
633    fn default() -> Self {
634        Self::new()
635    }
636}
637
638/// Sanitize a string to be a valid Rust identifier
639///
640/// This function ensures the result can be safely used as:
641/// - Function names
642/// - Variable names
643/// - Module names
644/// - Struct/enum names
645///
646/// ## Transformations
647///
648/// 1. Replaces special characters (`.`, `-`, `/`, etc.) with underscores
649/// 2. Prefixes with `_` if starts with a digit
650/// 3. Escapes Rust keywords with `r#` prefix
651///
652/// ## Examples
653///
654/// ```
655/// use hemmer_provider_generator_common::sanitize_rust_identifier;
656///
657/// assert_eq!(sanitize_rust_identifier("rbac.authorization"), "rbac_authorization");
658/// assert_eq!(sanitize_rust_identifier("type"), "r#type");
659/// assert_eq!(sanitize_rust_identifier("acm-pca"), "acm_pca");
660/// assert_eq!(sanitize_rust_identifier("123invalid"), "_123invalid");
661/// assert_eq!(sanitize_rust_identifier("normal_name"), "normal_name");
662/// ```
663pub fn sanitize_rust_identifier(name: &str) -> String {
664    // Replace special characters with underscores
665    let sanitized: String = name
666        .chars()
667        .map(|c| {
668            if c.is_alphanumeric() || c == '_' {
669                c
670            } else {
671                '_'
672            }
673        })
674        .collect();
675
676    // Clean up consecutive underscores
677    let mut sanitized = sanitized;
678    while sanitized.contains("__") {
679        sanitized = sanitized.replace("__", "_");
680    }
681
682    // Remove leading/trailing underscores
683    let sanitized = sanitized.trim_matches('_');
684
685    // Ensure doesn't start with digit
686    let sanitized = if sanitized.chars().next().is_some_and(|c| c.is_ascii_digit()) {
687        format!("_{}", sanitized)
688    } else {
689        sanitized.to_string()
690    };
691
692    // Escape Rust keywords with r# prefix
693    const RUST_KEYWORDS: &[&str] = &[
694        // Strict keywords (always reserved)
695        "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for",
696        "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return",
697        "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe", "use",
698        "where", "while", // Reserved keywords (reserved for future use)
699        "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof",
700        "unsized", "virtual", "yield",
701        // Weak keywords (context-dependent, but safer to escape)
702        "async", "await", "dyn", "try", "union",
703    ];
704
705    if RUST_KEYWORDS.contains(&sanitized.as_str()) {
706        format!("r#{}", sanitized)
707    } else {
708        sanitized
709    }
710}
711
712/// Sanitize a string to be a valid Rust identifier part (for composite names)
713///
714/// This function is similar to `sanitize_rust_identifier`, but handles keywords
715/// differently for use in composite identifiers like function names.
716///
717/// For keywords, it appends an underscore suffix instead of using the r# prefix,
718/// since r# only works for complete identifiers, not parts of composite names.
719///
720/// # Examples
721///
722/// ```
723/// use hemmer_provider_generator_common::sanitize_identifier_part;
724///
725/// // Dots become underscores
726/// assert_eq!(sanitize_identifier_part("rbac.authorization"), "rbac_authorization");
727///
728/// // Keywords get underscore suffix (not r# prefix)
729/// assert_eq!(sanitize_identifier_part("type"), "type_");
730///
731/// // Valid names are unchanged
732/// assert_eq!(sanitize_identifier_part("bucket"), "bucket");
733/// ```
734///
735/// # Transformations
736///
737/// 1. Replaces special characters (dots, hyphens, etc.) with underscores
738/// 2. Collapses consecutive underscores to single underscores
739/// 3. Removes leading/trailing underscores
740/// 4. Prefixes with underscore if starts with digit
741/// 5. Appends underscore suffix to Rust keywords
742///
743/// Use for: function name parts, composite identifiers
744pub fn sanitize_identifier_part(name: &str) -> String {
745    // Replace special characters with underscores
746    let sanitized: String = name
747        .chars()
748        .map(|c| {
749            if c.is_alphanumeric() || c == '_' {
750                c
751            } else {
752                '_'
753            }
754        })
755        .collect();
756
757    // Clean up consecutive underscores
758    let mut sanitized = sanitized;
759    while sanitized.contains("__") {
760        sanitized = sanitized.replace("__", "_");
761    }
762
763    // Remove leading/trailing underscores
764    let sanitized = sanitized.trim_matches('_');
765
766    // Ensure doesn't start with digit
767    let sanitized = if sanitized.chars().next().is_some_and(|c| c.is_ascii_digit()) {
768        format!("_{}", sanitized)
769    } else {
770        sanitized.to_string()
771    };
772
773    // For composite identifiers, append underscore to keywords instead of r# prefix
774    const RUST_KEYWORDS: &[&str] = &[
775        // Strict keywords (always reserved)
776        "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for",
777        "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return",
778        "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe", "use",
779        "where", "while", // Reserved keywords (reserved for future use)
780        "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof",
781        "unsized", "virtual", "yield",
782        // Weak keywords (context-dependent, but safer to escape)
783        "async", "await", "dyn", "try", "union",
784    ];
785
786    if RUST_KEYWORDS.contains(&sanitized.as_str()) {
787        format!("{}_", sanitized)
788    } else {
789        sanitized
790    }
791}
792
793#[cfg(test)]
794mod tests {
795    use super::*;
796
797    #[test]
798    fn test_field_type_creation() {
799        let ft = FieldType::String;
800        assert_eq!(ft, FieldType::String);
801    }
802
803    #[test]
804    fn test_field_type_to_kcl() {
805        assert_eq!(FieldType::String.to_kcl_type(), "String");
806        assert_eq!(FieldType::Integer.to_kcl_type(), "Integer");
807        assert_eq!(
808            FieldType::List(Box::new(FieldType::String)).to_kcl_type(),
809            "List<String>"
810        );
811        assert_eq!(
812            FieldType::Map(Box::new(FieldType::String), Box::new(FieldType::Integer)).to_kcl_type(),
813            "Map<String,Integer>"
814        );
815    }
816
817    #[test]
818    fn test_field_type_to_rust() {
819        assert_eq!(FieldType::String.to_rust_type(), "String");
820        assert_eq!(FieldType::Integer.to_rust_type(), "i64");
821        assert_eq!(
822            FieldType::List(Box::new(FieldType::String)).to_rust_type(),
823            "Vec<String>"
824        );
825        assert_eq!(
826            FieldType::Map(Box::new(FieldType::String), Box::new(FieldType::Integer))
827                .to_rust_type(),
828            "HashMap<String, i64>"
829        );
830    }
831
832    #[test]
833    fn test_service_definition() {
834        let service = ServiceDefinition {
835            provider: Provider::Aws,
836            name: "s3".to_string(),
837            sdk_version: "1.0.0".to_string(),
838            resources: vec![],
839            data_sources: vec![], // Will implement data source detection later
840        };
841
842        assert_eq!(service.provider, Provider::Aws);
843        assert_eq!(service.name, "s3");
844    }
845
846    // Mock parser for testing
847    struct MockParser {
848        service_name: String,
849        sdk_version: String,
850    }
851
852    impl SdkParser for MockParser {
853        fn parse(&self) -> Result<ServiceDefinition> {
854            Ok(ServiceDefinition {
855                provider: Provider::Aws,
856                name: self.service_name.clone(),
857                sdk_version: self.sdk_version.clone(),
858                resources: vec![],
859                data_sources: vec![], // Will implement data source detection later
860            })
861        }
862
863        fn supported_services(&self) -> Vec<String> {
864            vec![self.service_name.clone()]
865        }
866
867        fn metadata(&self) -> SdkMetadata {
868            SdkMetadata {
869                provider: Provider::Aws,
870                sdk_version: self.sdk_version.clone(),
871                sdk_name: "mock-sdk".to_string(),
872            }
873        }
874    }
875
876    #[test]
877    fn test_parser_registry_new() {
878        let registry = ParserRegistry::new();
879        assert_eq!(registry.list_providers().len(), 0);
880    }
881
882    #[test]
883    fn test_parser_registry_register() {
884        let mut registry = ParserRegistry::new();
885        let parser = MockParser {
886            service_name: "s3".to_string(),
887            sdk_version: "1.0.0".to_string(),
888        };
889
890        registry.register("aws", Box::new(parser));
891        assert!(registry.has_provider("aws"));
892        assert!(!registry.has_provider("gcp"));
893    }
894
895    #[test]
896    fn test_parser_registry_get() {
897        let mut registry = ParserRegistry::new();
898        let parser = MockParser {
899            service_name: "s3".to_string(),
900            sdk_version: "1.0.0".to_string(),
901        };
902
903        registry.register("aws", Box::new(parser));
904
905        let retrieved = registry.get("aws");
906        assert!(retrieved.is_some());
907
908        let metadata = retrieved.unwrap().metadata();
909        assert_eq!(metadata.provider, Provider::Aws);
910        assert_eq!(metadata.sdk_name, "mock-sdk");
911    }
912
913    #[test]
914    fn test_parser_registry_list_providers() {
915        let mut registry = ParserRegistry::new();
916
917        registry.register(
918            "aws",
919            Box::new(MockParser {
920                service_name: "s3".to_string(),
921                sdk_version: "1.0.0".to_string(),
922            }),
923        );
924
925        registry.register(
926            "gcp",
927            Box::new(MockParser {
928                service_name: "storage".to_string(),
929                sdk_version: "2.0.0".to_string(),
930            }),
931        );
932
933        let providers = registry.list_providers();
934        assert_eq!(providers.len(), 2);
935        assert!(providers.contains(&"aws".to_string()));
936        assert!(providers.contains(&"gcp".to_string()));
937    }
938
939    #[test]
940    fn test_parser_registry_default() {
941        let registry = ParserRegistry::default();
942        assert_eq!(registry.list_providers().len(), 0);
943    }
944
945    #[test]
946    fn test_sanitize_rust_identifier_dots() {
947        assert_eq!(
948            sanitize_rust_identifier("rbac.authorization"),
949            "rbac_authorization"
950        );
951        assert_eq!(
952            sanitize_rust_identifier("apis.internal.k8s.io"),
953            "apis_internal_k8s_io"
954        );
955    }
956
957    #[test]
958    fn test_sanitize_rust_identifier_hyphens() {
959        assert_eq!(sanitize_rust_identifier("acm-pca"), "acm_pca");
960        assert_eq!(sanitize_rust_identifier("eks-fargate"), "eks_fargate");
961    }
962
963    #[test]
964    fn test_sanitize_rust_identifier_keywords() {
965        assert_eq!(sanitize_rust_identifier("type"), "r#type");
966        assert_eq!(sanitize_rust_identifier("async"), "r#async");
967        assert_eq!(sanitize_rust_identifier("await"), "r#await");
968        assert_eq!(sanitize_rust_identifier("match"), "r#match");
969        assert_eq!(sanitize_rust_identifier("self"), "r#self");
970        assert_eq!(sanitize_rust_identifier("Self"), "r#Self");
971    }
972
973    #[test]
974    fn test_sanitize_rust_identifier_starts_with_digit() {
975        assert_eq!(sanitize_rust_identifier("123invalid"), "_123invalid");
976        assert_eq!(sanitize_rust_identifier("2fa"), "_2fa");
977    }
978
979    #[test]
980    fn test_sanitize_rust_identifier_special_characters() {
981        assert_eq!(sanitize_rust_identifier("foo/bar"), "foo_bar");
982        assert_eq!(sanitize_rust_identifier("foo@bar"), "foo_bar");
983        assert_eq!(sanitize_rust_identifier("foo bar"), "foo_bar");
984    }
985
986    #[test]
987    fn test_sanitize_rust_identifier_consecutive_underscores() {
988        assert_eq!(sanitize_rust_identifier("foo__bar"), "foo_bar");
989        assert_eq!(sanitize_rust_identifier("a...b"), "a_b");
990    }
991
992    #[test]
993    fn test_sanitize_rust_identifier_unchanged() {
994        assert_eq!(sanitize_rust_identifier("normal_name"), "normal_name");
995        assert_eq!(sanitize_rust_identifier("ValidRustName"), "ValidRustName");
996        assert_eq!(sanitize_rust_identifier("name123"), "name123");
997    }
998
999    #[test]
1000    fn test_sanitize_rust_identifier_edge_cases() {
1001        // Empty string behavior
1002        assert_eq!(sanitize_rust_identifier(""), "");
1003        // Only special characters
1004        assert_eq!(sanitize_rust_identifier("..."), "");
1005        // Leading/trailing underscores removed
1006        assert_eq!(sanitize_rust_identifier("_test_"), "test");
1007    }
1008
1009    #[test]
1010    fn test_sanitize_identifier_part_dots() {
1011        // Kubernetes resources with dots
1012        assert_eq!(
1013            sanitize_identifier_part("rbac.authorization"),
1014            "rbac_authorization"
1015        );
1016        assert_eq!(sanitize_identifier_part("apps.v1"), "apps_v1");
1017    }
1018
1019    #[test]
1020    fn test_sanitize_identifier_part_keywords() {
1021        // Keywords get underscore suffix for composite names
1022        assert_eq!(sanitize_identifier_part("type"), "type_");
1023        assert_eq!(sanitize_identifier_part("mod"), "mod_");
1024        assert_eq!(sanitize_identifier_part("async"), "async_");
1025        // r# prefix would be invalid in function names like plan_type()
1026        // Must use suffix: plan_type_()
1027    }
1028
1029    #[test]
1030    fn test_sanitize_identifier_part_valid_names() {
1031        // Valid names are unchanged
1032        assert_eq!(sanitize_identifier_part("bucket"), "bucket");
1033        assert_eq!(sanitize_identifier_part("deployment"), "deployment");
1034        assert_eq!(sanitize_identifier_part("managedkafka"), "managedkafka");
1035    }
1036
1037    #[test]
1038    fn test_sanitize_identifier_part_special_characters() {
1039        // Special chars become underscores
1040        assert_eq!(sanitize_identifier_part("test-name"), "test_name");
1041        assert_eq!(sanitize_identifier_part("test.name"), "test_name");
1042        assert_eq!(sanitize_identifier_part("test::name"), "test_name");
1043    }
1044
1045    #[test]
1046    fn test_sanitize_identifier_part_edge_cases() {
1047        // Empty string
1048        assert_eq!(sanitize_identifier_part(""), "");
1049        // Only special chars
1050        assert_eq!(sanitize_identifier_part("..."), "");
1051        // Leading/trailing underscores removed
1052        assert_eq!(sanitize_identifier_part("_test_"), "test");
1053        // Starts with digit
1054        assert_eq!(sanitize_identifier_part("2fa"), "_2fa");
1055    }
1056
1057    #[test]
1058    fn test_provider_from_name_builtin() {
1059        assert_eq!(Provider::from_name("aws").unwrap(), Provider::Aws);
1060        assert_eq!(Provider::from_name("gcp").unwrap(), Provider::Gcp);
1061        assert_eq!(Provider::from_name("azure").unwrap(), Provider::Azure);
1062        assert_eq!(
1063            Provider::from_name("kubernetes").unwrap(),
1064            Provider::Kubernetes
1065        );
1066    }
1067
1068    #[test]
1069    fn test_provider_from_name_unknown() {
1070        let result = Provider::from_name("nonexistent");
1071        assert!(result.is_err());
1072        assert!(result.unwrap_err().to_string().contains("Unknown provider"));
1073    }
1074
1075    #[test]
1076    fn test_provider_name() {
1077        assert_eq!(Provider::Aws.name(), "aws");
1078        assert_eq!(Provider::Gcp.name(), "gcp");
1079        assert_eq!(Provider::Azure.name(), "azure");
1080        assert_eq!(Provider::Kubernetes.name(), "kubernetes");
1081        assert_eq!(
1082            Provider::Custom("my-provider".to_string()).name(),
1083            "my-provider"
1084        );
1085    }
1086
1087    #[test]
1088    fn test_provider_sdk_config_loads_from_yaml() {
1089        // Test that all built-in providers can load their metadata files
1090        let aws_config = Provider::Aws.sdk_config();
1091        assert_eq!(aws_config.sdk_crate_pattern, "aws-sdk-{service}");
1092        assert_eq!(aws_config.client_type_pattern, "aws_sdk_{service}::Client");
1093        assert_eq!(aws_config.config_crate, Some("aws-config".to_string()));
1094        assert!(aws_config.async_client);
1095        assert_eq!(aws_config.region_attr, Some("region".to_string()));
1096        assert_eq!(aws_config.additional_dependencies.len(), 3);
1097        assert!(aws_config.error_metadata_import.is_some());
1098        assert!(aws_config.error_categorization_fn.is_some());
1099
1100        let gcp_config = Provider::Gcp.sdk_config();
1101        assert_eq!(gcp_config.sdk_crate_pattern, "google-cloud-{service}");
1102        assert_eq!(
1103            gcp_config.client_type_pattern,
1104            "google_cloud_{service}::Client"
1105        );
1106        assert_eq!(gcp_config.config_crate, None);
1107        assert!(gcp_config.async_client);
1108        assert_eq!(gcp_config.region_attr, Some("location".to_string()));
1109
1110        let azure_config = Provider::Azure.sdk_config();
1111        assert_eq!(azure_config.sdk_crate_pattern, "azure_sdk_{service}");
1112        assert_eq!(
1113            azure_config.config_crate,
1114            Some("azure_identity".to_string())
1115        );
1116
1117        let k8s_config = Provider::Kubernetes.sdk_config();
1118        assert_eq!(k8s_config.sdk_crate_pattern, "kube");
1119        assert_eq!(k8s_config.client_type_pattern, "kube::Client");
1120        assert_eq!(k8s_config.region_attr, None);
1121    }
1122
1123    #[test]
1124    fn test_provider_uses_shared_client() {
1125        assert!(!Provider::Aws.uses_shared_client());
1126        assert!(!Provider::Gcp.uses_shared_client());
1127        assert!(!Provider::Azure.uses_shared_client());
1128        assert!(Provider::Kubernetes.uses_shared_client());
1129    }
1130
1131    #[test]
1132    fn test_aws_error_categorization_function_generated() {
1133        let config = Provider::Aws.sdk_config();
1134        let error_fn = config
1135            .error_categorization_fn
1136            .expect("AWS should have error categorization");
1137
1138        // Verify function contains expected patterns
1139        assert!(error_fn.contains("categorize_error_code"));
1140        assert!(error_fn.contains("NotFound"));
1141        assert!(error_fn.contains("ProviderError::NotFound"));
1142        assert!(error_fn.contains("AlreadyExists"));
1143        assert!(error_fn.contains("ProviderError::AlreadyExists"));
1144        assert!(error_fn.contains("PermissionDenied"));
1145        assert!(error_fn.contains("sdk_error_to_provider_error"));
1146
1147        // Verify wildcard patterns are converted correctly from YAML:
1148        // "NoSuch*" -> starts_with("NoSuch")
1149        assert!(error_fn.contains("starts_with(\"NoSuch\")"));
1150        // "*InUse" -> ends_with("InUse")
1151        assert!(error_fn.contains("ends_with(\"InUse\")"));
1152        // "*LimitExceeded" -> ends_with("LimitExceeded")
1153        assert!(error_fn.contains("ends_with(\"LimitExceeded\")"));
1154        // "Invalid*" -> starts_with("Invalid")
1155        assert!(error_fn.contains("starts_with(\"Invalid\")"));
1156    }
1157}