use crate::services::aws_estate_service::AwsServiceParser;
use crate::types::Document;
use anyhow::{Result, anyhow};
use serde_json::{Value, json};
use std::collections::HashMap;
use tracing::{debug, warn};
use indexmap::IndexMap;
use uuid::Uuid;
use async_trait::async_trait;
pub struct IamParser;
impl IamParser {
pub fn new() -> Self {
Self
}
fn create_base_metadata(
account_id: &str,
service: &str,
resource_type: &str,
region: Option<&str>
) -> IndexMap<String, Value> {
let mut metadata = IndexMap::new();
metadata.insert("account_id".to_string(), json!(account_id));
metadata.insert("service".to_string(), json!(service));
metadata.insert("resource_type".to_string(), json!(resource_type));
metadata.insert("cloud_provider".to_string(), json!("aws"));
metadata.insert("document_type".to_string(), json!("aws_estate"));
metadata.insert("last_synced".to_string(), json!(chrono::Utc::now().timestamp()));
if let Some(region_val) = region {
metadata.insert("region".to_string(), json!(region_val));
}
metadata
}
fn generate_arn(
service: &str,
region: &str,
account_id: &str,
resource_type: &str,
resource_id: &str
) -> String {
if service == "iam" {
format!("arn:aws:iam::{}:{}:{}", account_id, resource_type, resource_id)
} else {
format!("arn:aws:{}:{}:{}:{}:{}", service, region, account_id, resource_type, resource_id)
}
}
fn parse_iam_resources(&self, account_id: &str, resource_type: &str, resources: &[Value]) -> Result<Vec<Document>> {
let mut documents = Vec::new();
let name_fields = ["UserName", "user_name", "RoleName", "role_name", "PolicyName", "policy_name", "GroupName", "group_name"];
let id_fields = ["UserId", "user_id", "RoleId", "role_id", "PolicyId", "policy_id", "GroupId", "group_id"];
let arn_fields = ["Arn", "arn"];
let create_date_fields = ["CreateDate", "create_date", "CreatedDate", "created_date"];
for resource in resources {
let resource_name = name_fields.iter()
.find_map(|field| resource.get(field).and_then(|v| v.as_str()))
.unwrap_or("unknown");
let resource_id = id_fields.iter()
.find_map(|field| resource.get(field).and_then(|v| v.as_str()))
.unwrap_or("unknown");
let generated_arn = Self::generate_arn("iam", "", account_id, resource_type, resource_name);
let arn = arn_fields.iter()
.find_map(|field| resource.get(field).and_then(|v| v.as_str()))
.unwrap_or(&generated_arn);
let create_date = create_date_fields.iter()
.find_map(|field| resource.get(field).and_then(|v| v.as_str()))
.unwrap_or("unknown");
let content = format!(
"IAM {} {} (ID: {}) - Created: {} - ARN: {}",
resource_type.replace("iam-", "").to_uppercase(),
resource_name,
resource_id,
create_date,
arn
);
let mut metadata = Self::create_base_metadata(
account_id,
"iam",
&format!("iam-{}", resource_type),
None );
if let Some(resource_obj) = resource.as_object() {
for (key, value) in resource_obj {
let snake_case_key = Self::to_snake_case(key);
if !["embedding", "embeddings", "vector", "_vectors"].contains(&snake_case_key.as_str()) {
metadata.insert(snake_case_key, value.clone());
}
}
}
let document = Document::new(
arn.to_string(),
content,
).with_metadata(metadata);
documents.push(document);
debug!("✅ Created IAM {} document for: {}", resource_type, resource_name);
}
Ok(documents)
}
fn to_snake_case(s: &str) -> String {
let mut result = String::new();
for (i, ch) in s.chars().enumerate() {
if ch.is_uppercase() && i > 0 {
result.push('_');
}
result.push(ch.to_lowercase().next().unwrap());
}
result
}
}
#[async_trait]
impl AwsServiceParser for IamParser {
fn service_name(&self) -> &str {
"iam"
}
fn can_parse(&self, service_data: &Value) -> bool {
service_data.is_object() && (
service_data.get("Users").is_some() ||
service_data.get("users").is_some() ||
service_data.get("Roles").is_some() ||
service_data.get("roles").is_some() ||
service_data.get("Policies").is_some() ||
service_data.get("policies").is_some() ||
service_data.get("Groups").is_some() ||
service_data.get("groups").is_some() ||
service_data.get("TotalUsers").is_some() ||
service_data.get("total_users").is_some() ||
service_data.get("account_summary").is_some()
)
}
async fn parse(&self, account_id: &str, service_data: &Value) -> Result<Vec<Document>> {
debug!("🔍 IAM parser processing data for account: {}", account_id);
let mut documents = Vec::new();
let resource_mappings = vec![
(vec!["Users", "users"], "user"),
(vec!["Roles", "roles"], "role"),
(vec!["Policies", "policies"], "policy"),
(vec!["Groups", "groups"], "group"),
];
for (field_variations, resource_type) in resource_mappings {
for field_name in &field_variations {
if let Some(resources) = service_data.get(field_name).and_then(|v| v.as_array()) {
if !resources.is_empty() {
match self.parse_iam_resources(account_id, resource_type, resources) {
Ok(mut resource_docs) => {
let count = resource_docs.len();
documents.append(&mut resource_docs);
debug!("✅ Parsed {} IAM {}s", count, resource_type);
}
Err(e) => {
warn!("⚠️ Failed to parse IAM {}s: {}", resource_type, e);
}
}
}
break; }
}
}
if documents.is_empty() {
warn!("🟡 IAM parser found no parseable data");
} else {
debug!("🎉 IAM parser generated {} documents", documents.len());
}
Ok(documents)
}
fn get_data_schema(&self) -> Option<Value> {
Some(json!({
"type": "object",
"description": "Dynamic IAM parser that supports Users, Roles, Policies, and Groups",
"properties": {
"Users": {
"type": "array",
"description": "IAM users with all AWS IAM user fields",
"items": { "type": "object" }
},
"Roles": {
"type": "array",
"description": "IAM roles with all AWS IAM role fields",
"items": { "type": "object" }
},
"Policies": {
"type": "array",
"description": "IAM policies with all AWS IAM policy fields",
"items": { "type": "object" }
},
"Groups": {
"type": "array",
"description": "IAM groups with all AWS IAM group fields",
"items": { "type": "object" }
},
"TotalUsers": { "type": "number" },
"TotalRoles": { "type": "number" },
"TotalPolicies": { "type": "number" },
"TotalGroups": { "type": "number" }
},
"note": "This parser dynamically extracts ALL fields from IAM resources"
}))
}
}