use anyhow::Result;
use async_trait::async_trait;
use serde::{Serialize, Deserialize};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::RwLock;
use uuid::Uuid;
use crate::services::EncryptionService;
use crate::config::PrivacyConfig;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MappingEntry {
pub real_id: String,
pub anonymous_id: String,
pub resource_type: Option<String>,
pub cloud_provider: Option<String>,
pub created_at: chrono::DateTime<chrono::Utc>,
pub metadata: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnonymizationOptions {
pub preserve_structure: bool,
pub include_hash: bool,
pub prefix: Option<String>,
}
impl Default for AnonymizationOptions {
fn default() -> Self {
Self {
preserve_structure: true,
include_hash: false,
prefix: Some("res".to_string()),
}
}
}
pub struct MappingService {
base_path: PathBuf,
encryption_service: Arc<EncryptionService>,
privacy_config: PrivacyConfig,
mappings: Arc<RwLock<HashMap<String, String>>>, reverse_mappings: Arc<RwLock<HashMap<String, String>>>, mapping_entries: Arc<RwLock<HashMap<String, MappingEntry>>>,
mappings_path: PathBuf,
loaded: Arc<RwLock<bool>>,
}
impl MappingService {
pub async fn new(privacy_config: &PrivacyConfig) -> Result<Self> {
let base_path = PathBuf::from("./data");
let mappings_path = base_path.join("mappings.encrypted");
let encryption_config = crate::types::EncryptionConfig::default();
let encryption_service = Arc::new(
crate::services::EncryptionService::new(&encryption_config, "./").await?
);
Ok(Self {
base_path,
encryption_service,
privacy_config: privacy_config.clone(),
mappings: Arc::new(RwLock::new(HashMap::new())),
reverse_mappings: Arc::new(RwLock::new(HashMap::new())),
mapping_entries: Arc::new(RwLock::new(HashMap::new())),
mappings_path,
loaded: Arc::new(RwLock::new(false)),
})
}
pub async fn load_mappings(&self) -> Result<()> {
if tokio::fs::metadata(&self.mappings_path).await.is_ok() {
let encrypted_data = tokio::fs::read_to_string(&self.mappings_path).await?;
let decrypted_data = self.encryption_service.decrypt_content(&encrypted_data).await?;
let mappings_data: HashMap<String, MappingEntry> = serde_json::from_str(&decrypted_data)?;
let mut mappings = self.mappings.write().await;
let mut reverse_mappings = self.reverse_mappings.write().await;
let mut mapping_entries = self.mapping_entries.write().await;
mappings.clear();
reverse_mappings.clear();
mapping_entries.clear();
for (real_id, entry) in mappings_data {
mappings.insert(real_id.clone(), entry.anonymous_id.clone());
reverse_mappings.insert(entry.anonymous_id.clone(), real_id.clone());
mapping_entries.insert(real_id, entry);
}
}
let mut loaded = self.loaded.write().await;
*loaded = true;
Ok(())
}
pub async fn save_mappings(&self) -> Result<()> {
let mapping_entries = self.mapping_entries.read().await;
let mappings_data: HashMap<String, MappingEntry> = mapping_entries.clone();
let json_data = serde_json::to_string(&mappings_data)?;
let encrypted_data = self.encryption_service.encrypt_content(&json_data).await?;
tokio::fs::create_dir_all(&self.base_path).await?;
tokio::fs::write(&self.mappings_path, encrypted_data).await?;
Ok(())
}
pub async fn create_mapping(
&self,
real_id: &str,
metadata: Option<HashMap<String, serde_json::Value>>,
) -> Result<String> {
self.ensure_loaded().await?;
let mappings = self.mappings.read().await;
if let Some(existing_anonymous_id) = mappings.get(real_id) {
return Ok(existing_anonymous_id.clone());
}
drop(mappings);
let anonymous_id = self.generate_anonymous_id(real_id, metadata.as_ref())?;
let entry = MappingEntry {
real_id: real_id.to_string(),
anonymous_id: anonymous_id.clone(),
resource_type: self.detect_resource_type(real_id),
cloud_provider: self.detect_cloud_provider(real_id),
created_at: chrono::Utc::now(),
metadata: metadata.unwrap_or_default(),
};
{
let mut mappings = self.mappings.write().await;
let mut reverse_mappings = self.reverse_mappings.write().await;
let mut mapping_entries = self.mapping_entries.write().await;
mappings.insert(real_id.to_string(), anonymous_id.clone());
reverse_mappings.insert(anonymous_id.clone(), real_id.to_string());
mapping_entries.insert(real_id.to_string(), entry);
}
self.save_mappings().await?;
Ok(anonymous_id)
}
pub async fn get_anonymous_id(&self, real_id: &str) -> Result<Option<String>> {
self.ensure_loaded().await?;
let mappings = self.mappings.read().await;
Ok(mappings.get(real_id).cloned())
}
pub async fn get_real_id(&self, anonymous_id: &str) -> Result<Option<String>> {
self.ensure_loaded().await?;
let reverse_mappings = self.reverse_mappings.read().await;
Ok(reverse_mappings.get(anonymous_id).cloned())
}
pub async fn get_or_create_mapping(
&self,
real_id: &str,
metadata: Option<HashMap<String, serde_json::Value>>,
) -> Result<String> {
if let Some(anonymous_id) = self.get_anonymous_id(real_id).await? {
Ok(anonymous_id)
} else {
self.create_mapping(real_id, metadata).await
}
}
pub async fn anonymize_data(
&self,
data: &mut serde_json::Value,
options: Option<AnonymizationOptions>,
) -> Result<()> {
let opts = options.unwrap_or_default();
match data {
serde_json::Value::Object(map) => {
for (key, value) in map.iter_mut() {
if self.is_confidential_field(key) {
if let Some(string_value) = value.as_str() {
if self.looks_like_resource_id(string_value) {
let anonymous_id = self.get_or_create_mapping(string_value, None).await?;
*value = serde_json::Value::String(anonymous_id);
}
}
} else {
Box::pin(self.anonymize_data(value, Some(opts.clone()))).await?;
}
}
}
serde_json::Value::Array(arr) => {
for item in arr.iter_mut() {
Box::pin(self.anonymize_data(item, Some(opts.clone()))).await?;
}
}
serde_json::Value::String(s) => {
if self.looks_like_resource_id(s) {
let anonymous_id = self.get_or_create_mapping(s, None).await?;
*s = anonymous_id;
}
}
_ => {}
}
Ok(())
}
pub async fn deanonymize_data(
&self,
data: &mut serde_json::Value,
) -> Result<()> {
match data {
serde_json::Value::Object(map) => {
for value in map.values_mut() {
Box::pin(self.deanonymize_data(value)).await?;
}
}
serde_json::Value::Array(arr) => {
for item in arr.iter_mut() {
Box::pin(self.deanonymize_data(item)).await?;
}
}
serde_json::Value::String(s) => {
if let Some(real_id) = self.get_real_id(s).await? {
*s = real_id;
}
}
_ => {}
}
Ok(())
}
pub async fn list_mappings(&self) -> Result<Vec<MappingEntry>> {
self.ensure_loaded().await?;
let mapping_entries = self.mapping_entries.read().await;
Ok(mapping_entries.values().cloned().collect())
}
pub async fn delete_mapping(&self, real_id: &str) -> Result<bool> {
self.ensure_loaded().await?;
let mut mappings = self.mappings.write().await;
let mut reverse_mappings = self.reverse_mappings.write().await;
let mut mapping_entries = self.mapping_entries.write().await;
if let Some(anonymous_id) = mappings.remove(real_id) {
reverse_mappings.remove(&anonymous_id);
mapping_entries.remove(real_id);
drop(mappings);
drop(reverse_mappings);
drop(mapping_entries);
self.save_mappings().await?;
Ok(true)
} else {
Ok(false)
}
}
async fn ensure_loaded(&self) -> Result<()> {
let loaded = self.loaded.read().await;
if !*loaded {
drop(loaded);
self.load_mappings().await?;
}
Ok(())
}
fn generate_anonymous_id(
&self,
real_id: &str,
metadata: Option<&HashMap<String, serde_json::Value>>,
) -> Result<String> {
let resource_type = self.detect_resource_type(real_id).unwrap_or_else(|| "resource".to_string());
let cloud_provider = self.detect_cloud_provider(real_id).unwrap_or_else(|| "cloud".to_string());
let uuid_short = Uuid::new_v4().to_string().replace('-', "")[..12].to_string();
let anonymous_id = match cloud_provider.as_str() {
"aws" => {
if real_id.starts_with("arn:aws:") {
format!("aws-{}-{}", resource_type, uuid_short)
} else {
format!("res-{}-{}", resource_type, uuid_short)
}
}
"azure" => format!("azure-{}-{}", resource_type, uuid_short),
"gcp" => format!("gcp-{}-{}", resource_type, uuid_short),
_ => format!("res-{}-{}", resource_type, uuid_short),
};
Ok(anonymous_id)
}
fn detect_resource_type(&self, resource_id: &str) -> Option<String> {
if resource_id.starts_with("arn:aws:") {
if resource_id.contains(":ec2:") {
return Some("ec2".to_string());
} else if resource_id.contains(":rds:") {
return Some("rds".to_string());
} else if resource_id.contains(":s3:") {
return Some("s3".to_string());
} else if resource_id.contains(":lambda:") {
return Some("lambda".to_string());
} else if resource_id.contains(":iam:") {
return Some("iam".to_string());
}
}
if resource_id.starts_with("i-") {
return Some("instance".to_string());
} else if resource_id.starts_with("vol-") {
return Some("volume".to_string());
} else if resource_id.starts_with("subnet-") {
return Some("subnet".to_string());
} else if resource_id.starts_with("vpc-") {
return Some("vpc".to_string());
}
None
}
fn detect_cloud_provider(&self, resource_id: &str) -> Option<String> {
if resource_id.starts_with("arn:aws:") || resource_id.starts_with("i-") || resource_id.starts_with("vol-") {
Some("aws".to_string())
} else if resource_id.starts_with("/subscriptions/") {
Some("azure".to_string())
} else if resource_id.starts_with("projects/") {
Some("gcp".to_string())
} else {
None
}
}
fn is_confidential_field(&self, field_name: &str) -> bool {
self.privacy_config.confidential_fields.iter().any(|f| f == field_name) ||
field_name.contains("arn") ||
field_name.contains("id") && !field_name.contains("account_id") ||
field_name.contains("resource")
}
fn looks_like_resource_id(&self, value: &str) -> bool {
value.starts_with("arn:") ||
value.starts_with("i-") ||
value.starts_with("vol-") ||
value.starts_with("subnet-") ||
value.starts_with("vpc-") ||
value.starts_with("/subscriptions/") ||
value.starts_with("projects/")
}
pub async fn initialize(&self) -> Result<()> {
self.load_mappings().await?;
Ok(())
}
pub async fn shutdown(&self) -> Result<()> {
self.save_mappings().await?;
Ok(())
}
}
#[async_trait::async_trait]
impl crate::services::search_service::MappingService for MappingService {
async fn get_anonymous_id(&self, original_id: &str) -> Result<Option<String>> {
self.get_anonymous_id(original_id).await
}
}