rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
//! IAM Service Parser
//! 
//! Parses AWS IAM service data and generates searchable documents for users, roles, policies, and groups

use crate::services::aws_estate_service::AwsServiceParser;
use crate::types::Document;
use anyhow::{Result, anyhow};
use serde_json::{Value, json};
use std::collections::HashMap;
use tracing::{debug, warn};
use indexmap::IndexMap;
use uuid::Uuid;
use async_trait::async_trait;

pub struct IamParser;

impl IamParser {
    pub fn new() -> Self {
        Self
    }

    /// Create base metadata for AWS resources
    fn create_base_metadata(
        account_id: &str, 
        service: &str, 
        resource_type: &str,
        region: Option<&str>
    ) -> IndexMap<String, Value> {
        let mut metadata = IndexMap::new();
        metadata.insert("account_id".to_string(), json!(account_id));
        metadata.insert("service".to_string(), json!(service));
        metadata.insert("resource_type".to_string(), json!(resource_type));
        metadata.insert("cloud_provider".to_string(), json!("aws"));
        metadata.insert("document_type".to_string(), json!("aws_estate"));
        metadata.insert("last_synced".to_string(), json!(chrono::Utc::now().timestamp()));
        
        if let Some(region_val) = region {
            metadata.insert("region".to_string(), json!(region_val));
        }
        
        metadata
    }

    /// Generate AWS ARN for resources
    fn generate_arn(
        service: &str, 
        region: &str, 
        account_id: &str, 
        resource_type: &str, 
        resource_id: &str
    ) -> String {
        // IAM is a global service, so region is typically empty
        if service == "iam" {
            format!("arn:aws:iam::{}:{}:{}", account_id, resource_type, resource_id)
        } else {
            format!("arn:aws:{}:{}:{}:{}:{}", service, region, account_id, resource_type, resource_id)
        }
    }

    /// Dynamic parser for any IAM resource type (users, roles, policies, groups, etc.)
    fn parse_iam_resources(&self, account_id: &str, resource_type: &str, resources: &[Value]) -> Result<Vec<Document>> {
        let mut documents = Vec::new();

        // Define possible field name variations for common attributes
        let name_fields = ["UserName", "user_name", "RoleName", "role_name", "PolicyName", "policy_name", "GroupName", "group_name"];
        let id_fields = ["UserId", "user_id", "RoleId", "role_id", "PolicyId", "policy_id", "GroupId", "group_id"];
        let arn_fields = ["Arn", "arn"];
        let create_date_fields = ["CreateDate", "create_date", "CreatedDate", "created_date"];

        for resource in resources {
            // Dynamically extract the resource name
            let resource_name = name_fields.iter()
                .find_map(|field| resource.get(field).and_then(|v| v.as_str()))
                .unwrap_or("unknown");

            // Dynamically extract the resource ID
            let resource_id = id_fields.iter()
                .find_map(|field| resource.get(field).and_then(|v| v.as_str()))
                .unwrap_or("unknown");

            // Generate ARN if not present
            let generated_arn = Self::generate_arn("iam", "", account_id, resource_type, resource_name);
            let arn = arn_fields.iter()
                .find_map(|field| resource.get(field).and_then(|v| v.as_str()))
                .unwrap_or(&generated_arn);

            // Dynamically extract create date
            let create_date = create_date_fields.iter()
                .find_map(|field| resource.get(field).and_then(|v| v.as_str()))
                .unwrap_or("unknown");

            // Create searchable content with resource type
            let content = format!(
                "IAM {} {} (ID: {}) - Created: {} - ARN: {}",
                resource_type.replace("iam-", "").to_uppercase(),
                resource_name,
                resource_id,
                create_date,
                arn
            );

            // Create base metadata
            let mut metadata = Self::create_base_metadata(
                account_id,
                "iam",
                &format!("iam-{}", resource_type),
                None // IAM is global
            );

            // Dynamically add ALL fields from the resource to metadata
            if let Some(resource_obj) = resource.as_object() {
                for (key, value) in resource_obj {
                    // Convert field names to snake_case for consistency
                    let snake_case_key = Self::to_snake_case(key);

                    // Skip embedding/vector fields
                    if !["embedding", "embeddings", "vector", "_vectors"].contains(&snake_case_key.as_str()) {
                        metadata.insert(snake_case_key, value.clone());
                    }
                }
            }

            let document = Document::new(
                arn.to_string(),
                content,
            ).with_metadata(metadata);

            documents.push(document);
            debug!("✅ Created IAM {} document for: {}", resource_type, resource_name);
        }

        Ok(documents)
    }

    /// Convert PascalCase/camelCase to snake_case
    fn to_snake_case(s: &str) -> String {
        let mut result = String::new();
        for (i, ch) in s.chars().enumerate() {
            if ch.is_uppercase() && i > 0 {
                result.push('_');
            }
            result.push(ch.to_lowercase().next().unwrap());
        }
        result
    }
}

#[async_trait]
impl AwsServiceParser for IamParser {
    fn service_name(&self) -> &str {
        "iam"
    }
    
    fn can_parse(&self, service_data: &Value) -> bool {
        // Dynamically check if this looks like IAM data by looking for common IAM resource types
        service_data.is_object() && (
            service_data.get("Users").is_some() ||
            service_data.get("users").is_some() ||
            service_data.get("Roles").is_some() ||
            service_data.get("roles").is_some() ||
            service_data.get("Policies").is_some() ||
            service_data.get("policies").is_some() ||
            service_data.get("Groups").is_some() ||
            service_data.get("groups").is_some() ||
            service_data.get("TotalUsers").is_some() ||
            service_data.get("total_users").is_some() ||
            service_data.get("account_summary").is_some()
        )
    }
    
    async fn parse(&self, account_id: &str, service_data: &Value) -> Result<Vec<Document>> {
        debug!("🔍 IAM parser processing data for account: {}", account_id);

        let mut documents = Vec::new();

        // Define IAM resource types to look for (with both PascalCase and snake_case variations)
        let resource_mappings = vec![
            (vec!["Users", "users"], "user"),
            (vec!["Roles", "roles"], "role"),
            (vec!["Policies", "policies"], "policy"),
            (vec!["Groups", "groups"], "group"),
        ];

        // Dynamically parse each resource type
        for (field_variations, resource_type) in resource_mappings {
            // Try each field variation
            for field_name in &field_variations {
                if let Some(resources) = service_data.get(field_name).and_then(|v| v.as_array()) {
                    if !resources.is_empty() {
                        match self.parse_iam_resources(account_id, resource_type, resources) {
                            Ok(mut resource_docs) => {
                                let count = resource_docs.len();
                                documents.append(&mut resource_docs);
                                debug!("✅ Parsed {} IAM {}s", count, resource_type);
                            }
                            Err(e) => {
                                warn!("⚠️ Failed to parse IAM {}s: {}", resource_type, e);
                            }
                        }
                    }
                    break; // Found the field, no need to try other variations
                }
            }
        }

        if documents.is_empty() {
            warn!("🟡 IAM parser found no parseable data");
        } else {
            debug!("🎉 IAM parser generated {} documents", documents.len());
        }

        Ok(documents)
    }
    
    fn get_data_schema(&self) -> Option<Value> {
        Some(json!({
            "type": "object",
            "description": "Dynamic IAM parser that supports Users, Roles, Policies, and Groups",
            "properties": {
                "Users": {
                    "type": "array",
                    "description": "IAM users with all AWS IAM user fields",
                    "items": { "type": "object" }
                },
                "Roles": {
                    "type": "array",
                    "description": "IAM roles with all AWS IAM role fields",
                    "items": { "type": "object" }
                },
                "Policies": {
                    "type": "array",
                    "description": "IAM policies with all AWS IAM policy fields",
                    "items": { "type": "object" }
                },
                "Groups": {
                    "type": "array",
                    "description": "IAM groups with all AWS IAM group fields",
                    "items": { "type": "object" }
                },
                "TotalUsers": { "type": "number" },
                "TotalRoles": { "type": "number" },
                "TotalPolicies": { "type": "number" },
                "TotalGroups": { "type": "number" }
            },
            "note": "This parser dynamically extracts ALL fields from IAM resources"
        }))
    }
}