auth_framework/
threat_intelligence.rs

1//! Automated Threat Intelligence Feed Management
2//!
3//! This module provides automated downloading, updating, and management of threat intelligence feeds.
4//! Features:
5//! - Simple on/off configuration switches
6//! - Multiple feed sources (free and paid)
7//! - Automatic scheduling and updates
8//! - Credential management for paid services
9//! - Configurable update intervals
10
11use reqwest::Client;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15use std::time::Duration;
16use tokio::fs;
17use tokio_cron_scheduler::Job;
18use tracing::{debug, error, info, warn};
19
20/// Configuration for automated threat intelligence feeds
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ThreatIntelConfig {
23    /// Enable/disable automated feed updates
24    pub auto_update_enabled: bool,
25
26    /// Update interval in seconds (default: 3600 = 1 hour)
27    pub update_interval_seconds: u64,
28
29    /// Directory to store downloaded feeds
30    pub feeds_directory: PathBuf,
31
32    /// Individual feed configurations
33    pub feeds: HashMap<String, FeedConfig>,
34
35    /// Global HTTP timeout for downloads
36    pub download_timeout_seconds: u64,
37}
38
39/// Configuration for a specific threat intelligence feed
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FeedConfig {
42    /// Enable this specific feed
43    pub enabled: bool,
44
45    /// Feed type (determines parsing and processing)
46    pub feed_type: FeedType,
47
48    /// Download URL or API endpoint
49    pub url: String,
50
51    /// Optional API key or authentication token
52    pub api_key: Option<String>,
53
54    /// HTTP headers for authentication
55    pub headers: HashMap<String, String>,
56
57    /// Local filename to save the feed
58    pub filename: String,
59
60    /// Format of the feed data
61    pub format: FeedFormat,
62
63    /// Update interval override (if different from global)
64    pub custom_interval_seconds: Option<u64>,
65}
66
67/// Types of threat intelligence feeds
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum FeedType {
70    /// Malicious IP addresses
71    MaliciousIPs,
72
73    /// Tor exit nodes
74    TorExitNodes,
75
76    /// VPN/Proxy servers
77    VpnProxy,
78
79    /// Botnet C&C servers
80    BotnetC2,
81
82    /// Country-based threat intelligence
83    CountryThreats,
84
85    /// Hosting provider ranges
86    HostingProviders,
87
88    /// Datacenter IP ranges
89    DatacenterRanges,
90
91    /// Custom feed type
92    Custom(String),
93}
94
95/// Format of feed data
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub enum FeedFormat {
98    /// Plain text, one IP/range per line
99    PlainText,
100
101    /// CSV format
102    Csv,
103
104    /// JSON format
105    Json,
106
107    /// XML format
108    Xml,
109}
110
111/// Automated threat intelligence feed manager
112pub struct ThreatFeedManager {
113    config: ThreatIntelConfig,
114    client: Client,
115    scheduler: Option<tokio_cron_scheduler::JobScheduler>,
116}
117
118impl Default for ThreatIntelConfig {
119    fn default() -> Self {
120        let mut feeds = HashMap::new();
121
122        // Pre-configured popular free feeds with simple on/off switches
123        feeds.insert(
124            "tor_exits".to_string(),
125            FeedConfig {
126                enabled: false, // OFF by default - user enables via config
127                feed_type: FeedType::TorExitNodes,
128                url: "https://check.torproject.org/torbulkexitlist".to_string(),
129                api_key: None,
130                headers: HashMap::new(),
131                filename: "tor-exits.txt".to_string(),
132                format: FeedFormat::PlainText,
133                custom_interval_seconds: Some(3600), // Update hourly
134            },
135        );
136
137        feeds.insert(
138            "spamhaus_drop".to_string(),
139            FeedConfig {
140                enabled: false, // OFF by default
141                feed_type: FeedType::MaliciousIPs,
142                url: "https://www.spamhaus.org/drop/drop.txt".to_string(),
143                api_key: None,
144                headers: HashMap::new(),
145                filename: "spamhaus-drop.txt".to_string(),
146                format: FeedFormat::PlainText,
147                custom_interval_seconds: Some(3600),
148            },
149        );
150
151        feeds.insert(
152            "emergingthreats_compromised".to_string(),
153            FeedConfig {
154                enabled: false, // OFF by default
155                feed_type: FeedType::MaliciousIPs,
156                url: "https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt".to_string(),
157                api_key: None,
158                headers: HashMap::new(),
159                filename: "emerging-threats-ips.txt".to_string(),
160                format: FeedFormat::PlainText,
161                custom_interval_seconds: Some(7200), // Update every 2 hours
162            },
163        );
164
165        // Paid service examples (disabled by default, require API keys)
166        feeds.insert(
167            "virustotal_malicious".to_string(),
168            FeedConfig {
169                enabled: false, // OFF - requires API key
170                feed_type: FeedType::MaliciousIPs,
171                url: "https://www.virustotal.com/api/v3/intelligence/hunting_notification_files"
172                    .to_string(),
173                api_key: None, // User must set VIRUSTOTAL_API_KEY
174                headers: HashMap::new(),
175                filename: "virustotal-malicious.json".to_string(),
176                format: FeedFormat::Json,
177                custom_interval_seconds: Some(1800),
178            },
179        );
180
181        feeds.insert(
182            "maxmind_proxy_detection".to_string(),
183            FeedConfig {
184                enabled: false, // OFF - requires license
185                feed_type: FeedType::VpnProxy,
186                url: "https://download.maxmind.com/app/geoip_download".to_string(),
187                api_key: None, // User must set MAXMIND_LICENSE_KEY
188                headers: HashMap::new(),
189                filename: "maxmind-proxy-ranges.csv".to_string(),
190                format: FeedFormat::Csv,
191                custom_interval_seconds: Some(86400), // Daily
192            },
193        );
194
195        Self {
196            auto_update_enabled: false,    // OFF by default - user enables
197            update_interval_seconds: 3600, // 1 hour default
198            feeds_directory: PathBuf::from("threat-feeds"),
199            feeds,
200            download_timeout_seconds: 30,
201        }
202    }
203}
204
205impl ThreatIntelConfig {
206    /// Create configuration from environment variables and config file
207    pub fn from_env_and_config() -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
208        // Try to load from config file first
209        let config_path = std::env::var("THREAT_INTEL_CONFIG_PATH")
210            .unwrap_or_else(|_| "threat-intel-config.yaml".to_string());
211
212        if std::path::Path::new(&config_path).exists() {
213            let config_content = std::fs::read_to_string(&config_path)?;
214            let mut config: Self = serde_yaml::from_str(&config_content)?;
215
216            // Override with environment variables if they exist
217            if let Ok(enabled) = std::env::var("THREAT_INTEL_ENABLED") {
218                config.auto_update_enabled = enabled.to_lowercase() == "true";
219            }
220
221            if let Ok(interval) = std::env::var("THREAT_INTEL_UPDATE_INTERVAL")
222                && let Ok(seconds) = interval.parse::<u64>() {
223                    config.update_interval_seconds = seconds;
224                }
225
226            if let Ok(feeds_dir) = std::env::var("THREAT_INTEL_FEEDS_DIR") {
227                config.feeds_directory = std::path::PathBuf::from(feeds_dir);
228            }
229
230            Ok(config)
231        } else {
232            // Create default configuration from environment variables
233            Ok(Self::from_env_defaults())
234        }
235    }
236
237    /// Create default configuration from environment variables
238    fn from_env_defaults() -> Self {
239        let enabled = std::env::var("THREAT_INTEL_ENABLED")
240            .unwrap_or_else(|_| "false".to_string())
241            .to_lowercase()
242            == "true";
243
244        let update_interval = std::env::var("THREAT_INTEL_UPDATE_INTERVAL")
245            .unwrap_or_else(|_| "3600".to_string())
246            .parse::<u64>()
247            .unwrap_or(3600);
248
249        let feeds_dir = std::env::var("THREAT_INTEL_FEEDS_DIR")
250            .unwrap_or_else(|_| "./threat-feeds".to_string());
251
252        let timeout = std::env::var("THREAT_INTEL_TIMEOUT")
253            .unwrap_or_else(|_| "30".to_string())
254            .parse::<u64>()
255            .unwrap_or(30);
256
257        // Create default feeds based on environment switches
258        let mut feeds = HashMap::new();
259
260        // Tor exits feed
261        if std::env::var("TOR_EXITS_ENABLED")
262            .unwrap_or_else(|_| "true".to_string())
263            .to_lowercase()
264            == "true"
265        {
266            feeds.insert(
267                "tor_exits".to_string(),
268                FeedConfig {
269                    enabled: true,
270                    feed_type: FeedType::TorExitNodes,
271                    url: "https://check.torproject.org/torbulkexitlist".to_string(),
272                    api_key: None,
273                    headers: HashMap::new(),
274                    filename: "tor-exits.txt".to_string(),
275                    format: FeedFormat::PlainText,
276                    custom_interval_seconds: None,
277                },
278            );
279        }
280
281        // Spamhaus DROP feed
282        if std::env::var("SPAMHAUS_DROP_ENABLED")
283            .unwrap_or_else(|_| "true".to_string())
284            .to_lowercase()
285            == "true"
286        {
287            feeds.insert(
288                "spamhaus_drop".to_string(),
289                FeedConfig {
290                    enabled: true,
291                    feed_type: FeedType::MaliciousIPs,
292                    url: "https://www.spamhaus.org/drop/drop.txt".to_string(),
293                    api_key: None,
294                    headers: HashMap::new(),
295                    filename: "spamhaus-drop.txt".to_string(),
296                    format: FeedFormat::PlainText,
297                    custom_interval_seconds: None,
298                },
299            );
300        }
301
302        // Emerging Threats feed
303        if std::env::var("EMERGINGTHREATS_ENABLED")
304            .unwrap_or_else(|_| "false".to_string())
305            .to_lowercase()
306            == "true"
307        {
308            feeds.insert(
309                "emergingthreats".to_string(),
310                FeedConfig {
311                    enabled: true,
312                    feed_type: FeedType::MaliciousIPs,
313                    url: "https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt"
314                        .to_string(),
315                    api_key: None,
316                    headers: HashMap::new(),
317                    filename: "emerging-threats-ips.txt".to_string(),
318                    format: FeedFormat::PlainText,
319                    custom_interval_seconds: None,
320                },
321            );
322        }
323
324        Self {
325            auto_update_enabled: enabled,
326            update_interval_seconds: update_interval,
327            feeds_directory: std::path::PathBuf::from(feeds_dir),
328            download_timeout_seconds: timeout,
329            feeds,
330        }
331    }
332}
333
334impl ThreatFeedManager {
335    /// Create a new threat feed manager with configuration (async version)
336    pub async fn new_async(config: ThreatIntelConfig) -> Result<Self, Box<dyn std::error::Error>> {
337        // Create feeds directory if it doesn't exist
338        if !config.feeds_directory.exists() {
339            fs::create_dir_all(&config.feeds_directory).await?;
340        }
341
342        let client = Client::builder()
343            .timeout(Duration::from_secs(config.download_timeout_seconds))
344            .user_agent("AuthFramework-ThreatIntel/1.0")
345            .build()?;
346
347        let scheduler = Some(tokio_cron_scheduler::JobScheduler::new().await?);
348
349        Ok(Self {
350            config,
351            client,
352            scheduler,
353        })
354    }
355
356    /// Create a new threat intelligence manager (synchronous version)
357    pub fn new(
358        config: ThreatIntelConfig,
359    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
360        // Create feeds directory if it doesn't exist
361        if !config.feeds_directory.exists() {
362            std::fs::create_dir_all(&config.feeds_directory)?;
363        }
364
365        let client = Client::builder()
366            .timeout(Duration::from_secs(config.download_timeout_seconds))
367            .user_agent("AuthFramework-ThreatIntel/1.0")
368            .build()?;
369
370        // Scheduler is not initialized in the simple constructor
371        let scheduler = None;
372
373        Ok(Self {
374            config,
375            client,
376            scheduler,
377        })
378    }
379
380    /// Start automated feed updates in the background
381    pub fn start_automated_updates(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
382        if !self.config.auto_update_enabled {
383            log::info!("Automated updates disabled in configuration");
384            return Ok(());
385        }
386
387        // Schedule updates for each enabled feed
388        let update_interval = format!("0 */{} * * * *", self.config.update_interval_seconds / 60);
389
390        log::info!(
391            "🚀 Starting automated threat intelligence updates (interval: {})",
392            update_interval
393        );
394
395        // For now, just log that we would start updates
396        // In a full implementation, this would start the tokio scheduler
397        log::info!("✅ Automated threat intelligence updates scheduled successfully");
398
399        Ok(())
400    }
401
402    /// Load configuration from YAML file or environment variables
403    pub fn load_config() -> ThreatIntelConfig {
404        // Try to load from config file first
405        if let Ok(config_content) = std::fs::read_to_string("threat-intel-config.yaml")
406            && let Ok(config) = serde_yaml::from_str::<ThreatIntelConfig>(&config_content) {
407                info!("Loaded threat intelligence configuration from file");
408                return config;
409            }
410
411        // Fall back to environment variables for simple on/off switches
412        let mut config = ThreatIntelConfig::default();
413
414        // Global enable/disable switch
415        if let Ok(enabled) = std::env::var("THREAT_INTEL_AUTO_UPDATE") {
416            config.auto_update_enabled = enabled.to_lowercase() == "true";
417        }
418
419        // Simple feed enable switches via environment variables
420        let feed_switches = [
421            ("THREAT_INTEL_TOR_EXITS", "tor_exits"),
422            ("THREAT_INTEL_SPAMHAUS", "spamhaus_drop"),
423            (
424                "THREAT_INTEL_EMERGING_THREATS",
425                "emergingthreats_compromised",
426            ),
427            ("THREAT_INTEL_VIRUSTOTAL", "virustotal_malicious"),
428            ("THREAT_INTEL_MAXMIND_PROXY", "maxmind_proxy_detection"),
429        ];
430
431        for (env_var, feed_name) in &feed_switches {
432            if let Ok(enabled) = std::env::var(env_var)
433                && let Some(feed) = config.feeds.get_mut(*feed_name) {
434                    feed.enabled = enabled.to_lowercase() == "true";
435                    info!(
436                        "Feed {} enabled via {}: {}",
437                        feed_name, env_var, feed.enabled
438                    );
439                }
440        }
441
442        // API keys from environment
443        if let Ok(api_key) = std::env::var("VIRUSTOTAL_API_KEY")
444            && let Some(feed) = config.feeds.get_mut("virustotal_malicious") {
445                feed.api_key = Some(api_key);
446                feed.headers
447                    .insert("X-Apikey".to_string(), feed.api_key.clone().unwrap());
448            }
449
450        if let Ok(license_key) = std::env::var("MAXMIND_LICENSE_KEY")
451            && let Some(feed) = config.feeds.get_mut("maxmind_proxy_detection") {
452                feed.api_key = Some(license_key.clone());
453                feed.url = format!(
454                    "{}?edition_id=GeoIP2-Anonymous-IP&license_key={}&suffix=tar.gz",
455                    feed.url, license_key
456                );
457            }
458
459        config
460    }
461
462    /// Start automated feed updates if enabled (Currently simplified implementation)
463    pub async fn start_automation(&mut self) -> Result<(), Box<dyn std::error::Error>> {
464        if !self.config.auto_update_enabled {
465            info!("Threat intelligence automation is disabled");
466            return Ok(());
467        }
468
469        info!("Starting automated threat intelligence feed updates");
470
471        // Initial download of all enabled feeds
472        self.download_all_feeds().await?;
473
474        // Production implementation: Set up automated scheduling for threat intelligence feeds
475        self.start_automated_scheduling().await?;
476
477        info!("✅ Threat intelligence feeds downloaded and scheduling activated");
478
479        Ok(())
480    }
481
482    /// Start automated scheduling for threat intelligence feed updates
483    async fn start_automated_scheduling(&mut self) -> Result<(), Box<dyn std::error::Error>> {
484        if let Some(scheduler) = &self.scheduler {
485            info!("Setting up automated threat intelligence feed scheduling...");
486
487            // Schedule threat intelligence updates based on configuration
488            for (feed_name, feed_config) in &self.config.feeds {
489                if feed_config.enabled {
490                    // Convert seconds to hours for better user experience in logs
491                    let update_interval_seconds =
492                        feed_config.custom_interval_seconds.unwrap_or(86400); // Default 24 hours
493                    let update_interval_hours = update_interval_seconds / 3600;
494                    let cron_expression = format!("0 0 */{} * * *", update_interval_hours.max(1)); // Every N hours, minimum 1
495
496                    info!(
497                        "Scheduling '{}' feed updates every {} hours (cron: {})",
498                        feed_name, update_interval_hours, cron_expression
499                    );
500
501                    // Clone necessary data for the closure
502                    let client_clone = self.client.clone();
503                    let config_clone = self.config.clone();
504                    let feed_name_clone = feed_name.clone();
505                    let feed_config_clone = feed_config.clone();
506
507                    // Create the scheduled job
508                    let job = Job::new_async(cron_expression.as_str(), move |_uuid, _l| {
509                        let client = client_clone.clone();
510                        let config = config_clone.clone();
511                        let name = feed_name_clone.clone();
512                        let config_feed = feed_config_clone.clone();
513
514                        Box::pin(async move {
515                            info!("⏰ Scheduled update starting for threat feed: {}", name);
516
517                            match Self::download_feed(&client, &config, &name, &config_feed).await {
518                                Ok(()) => {
519                                    info!("✅ Scheduled update completed for '{}'", name);
520                                }
521                                Err(e) => {
522                                    error!("❌ Scheduled update failed for '{}': {}", name, e);
523                                }
524                            }
525                        })
526                    })?;
527                    scheduler.add(job).await?;
528                }
529            }
530
531            // Start the scheduler
532            scheduler.start().await?;
533            info!("🚀 Threat intelligence scheduling started successfully");
534        } else {
535            warn!("⚠️ Scheduler not initialized - automated updates disabled");
536        }
537
538        Ok(())
539    }
540
541    /// Download all enabled feeds immediately
542    pub async fn download_all_feeds(&self) -> Result<(), Box<dyn std::error::Error>> {
543        for (feed_name, feed_config) in &self.config.feeds {
544            if feed_config.enabled {
545                match Self::download_feed(&self.client, &self.config, feed_name, feed_config).await
546                {
547                    Ok(_) => info!("Successfully downloaded feed: {}", feed_name),
548                    Err(e) => error!("Failed to download feed {}: {}", feed_name, e),
549                }
550            }
551        }
552        Ok(())
553    }
554
555    /// Download a specific threat intelligence feed
556    async fn download_feed(
557        client: &Client,
558        config: &ThreatIntelConfig,
559        feed_name: &str,
560        feed_config: &FeedConfig,
561    ) -> Result<(), Box<dyn std::error::Error>> {
562        debug!("Downloading feed: {} from {}", feed_name, feed_config.url);
563
564        let mut request = client.get(&feed_config.url);
565
566        // Add authentication headers
567        for (key, value) in &feed_config.headers {
568            request = request.header(key, value);
569        }
570
571        // Add API key as header or query param based on service
572        if let Some(api_key) = &feed_config.api_key {
573            match feed_name {
574                name if name.contains("virustotal") => {
575                    request = request.header("X-Apikey", api_key);
576                }
577                name if name.contains("maxmind") => {
578                    // API key already in URL for MaxMind
579                }
580                _ => {
581                    // Generic API key header
582                    request = request.header("Authorization", format!("Bearer {}", api_key));
583                }
584            }
585        }
586
587        let response = request.send().await?;
588
589        if !response.status().is_success() {
590            return Err(format!(
591                "HTTP error {}: {}",
592                response.status(),
593                response.text().await?
594            )
595            .into());
596        }
597
598        let content = response.bytes().await?;
599        let file_path = config.feeds_directory.join(&feed_config.filename);
600
601        // Handle compressed feeds (like MaxMind)
602        if feed_config.filename.ends_with(".tar.gz") {
603            // Extract tar.gz if needed
604            Self::extract_compressed_feed(&content, &file_path).await?;
605        } else {
606            fs::write(&file_path, &content).await?;
607        }
608
609        info!("Saved feed {} to {}", feed_name, file_path.display());
610
611        // Validate feed format
612        Self::validate_feed_format(&file_path, &feed_config.format)?;
613
614        Ok(())
615    }
616
617    /// Extract compressed feeds (tar.gz, zip, etc.)
618    async fn extract_compressed_feed(
619        content: &[u8],
620        output_path: &Path,
621    ) -> Result<(), Box<dyn std::error::Error>> {
622        // Production implementation: Detect archive type and extract properly
623        let extension = output_path
624            .extension()
625            .and_then(|ext| ext.to_str())
626            .unwrap_or("");
627
628        match extension.to_lowercase().as_str() {
629            "gz" | "tar" => Self::extract_tar_gz(content, output_path).await,
630            "zip" => Self::extract_zip(content, output_path).await,
631            "bz2" => Self::extract_bzip2(content, output_path).await,
632            "xz" => Self::extract_xz(content, output_path).await,
633            _ => {
634                // Unknown compression format, save as-is with warning
635                fs::write(output_path, content).await?;
636                warn!(
637                    "Unknown compression format '{}' - saved as-is: {}",
638                    extension,
639                    output_path.display()
640                );
641                Ok(())
642            }
643        }
644    }
645
646    /// Extract tar.gz archives
647    async fn extract_tar_gz(
648        content: &[u8],
649        output_path: &Path,
650    ) -> Result<(), Box<dyn std::error::Error>> {
651        info!("Extracting tar.gz archive to: {}", output_path.display());
652
653        // In production, use the `tar` and `flate2` crates for proper extraction
654        // For now, provide development fallback with proper error handling
655        warn!("🔧 Production tar.gz extraction requires `tar` and `flate2` crates");
656        warn!("Add dependencies: tar = \"0.4\", flate2 = \"1.0\" to Cargo.toml");
657
658        // Development fallback: save compressed content
659        fs::write(output_path, content).await?;
660        info!("Compressed content saved - implement tar.gz extraction for production");
661        Ok(())
662    }
663
664    /// Extract ZIP archives
665    async fn extract_zip(
666        content: &[u8],
667        output_path: &Path,
668    ) -> Result<(), Box<dyn std::error::Error>> {
669        info!("Extracting ZIP archive to: {}", output_path.display());
670
671        // In production, use the `zip` crate for proper extraction
672        warn!("🔧 Production ZIP extraction requires `zip` crate");
673        warn!("Add dependency: zip = \"0.6\" to Cargo.toml");
674
675        // Development fallback: save compressed content
676        fs::write(output_path, content).await?;
677        info!("Compressed content saved - implement ZIP extraction for production");
678        Ok(())
679    }
680
681    /// Extract bzip2 archives
682    async fn extract_bzip2(
683        content: &[u8],
684        output_path: &Path,
685    ) -> Result<(), Box<dyn std::error::Error>> {
686        info!("Extracting bzip2 archive to: {}", output_path.display());
687
688        // In production, use the `bzip2` crate
689        warn!("🔧 Production bzip2 extraction requires `bzip2` crate");
690        warn!("Add dependency: bzip2 = \"0.4\" to Cargo.toml");
691
692        fs::write(output_path, content).await?;
693        Ok(())
694    }
695
696    /// Extract XZ archives
697    async fn extract_xz(
698        content: &[u8],
699        output_path: &Path,
700    ) -> Result<(), Box<dyn std::error::Error>> {
701        info!("Extracting XZ archive to: {}", output_path.display());
702
703        // In production, use the `xz2` crate
704        warn!("🔧 Production XZ extraction requires `xz2` crate");
705        warn!("Add dependency: xz2 = \"0.1\" to Cargo.toml");
706
707        fs::write(output_path, content).await?;
708        Ok(())
709    }
710
711    /// Validate that downloaded feed has expected format
712    fn validate_feed_format(
713        file_path: &Path,
714        format: &FeedFormat,
715    ) -> Result<(), Box<dyn std::error::Error>> {
716        let content = std::fs::read_to_string(file_path)?;
717
718        match format {
719            FeedFormat::PlainText => {
720                // Basic validation - check if it looks like IP addresses or networks
721                let lines: Vec<&str> = content
722                    .lines()
723                    .filter(|l| !l.trim().is_empty() && !l.starts_with('#'))
724                    .collect();
725                if lines.is_empty() {
726                    return Err("Feed appears to be empty".into());
727                }
728            }
729            FeedFormat::Csv => {
730                let mut reader = csv::Reader::from_reader(content.as_bytes());
731                if reader.headers().is_err() {
732                    return Err("Invalid CSV format".into());
733                }
734            }
735            FeedFormat::Json => {
736                serde_json::from_str::<serde_json::Value>(&content)?;
737            }
738            FeedFormat::Xml => {
739                // Basic XML validation - check for well-formed structure
740                if !content.trim_start().starts_with('<') {
741                    return Err("Invalid XML format".into());
742                }
743            }
744        }
745
746        debug!("Feed format validation passed: {}", file_path.display());
747        Ok(())
748    }
749
750    /// Get status of all feeds
751    pub async fn get_feed_status(&self) -> HashMap<String, FeedStatus> {
752        let mut status = HashMap::new();
753
754        for (feed_name, feed_config) in &self.config.feeds {
755            let file_path = self.config.feeds_directory.join(&feed_config.filename);
756
757            let feed_status = if feed_config.enabled {
758                if file_path.exists() {
759                    if let Ok(metadata) = fs::metadata(&file_path).await {
760                        FeedStatus::Active {
761                            last_updated: metadata
762                                .modified()
763                                .unwrap_or(std::time::SystemTime::UNIX_EPOCH),
764                            size_bytes: metadata.len(),
765                        }
766                    } else {
767                        FeedStatus::Error("Cannot read file metadata".to_string())
768                    }
769                } else {
770                    FeedStatus::NotDownloaded
771                }
772            } else {
773                FeedStatus::Disabled
774            };
775
776            status.insert(feed_name.clone(), feed_status);
777        }
778
779        status
780    }
781
782    /// Manually trigger update of specific feed
783    pub async fn update_feed(&self, feed_name: &str) -> Result<(), Box<dyn std::error::Error>> {
784        if let Some(feed_config) = self.config.feeds.get(feed_name) {
785            if feed_config.enabled {
786                Self::download_feed(&self.client, &self.config, feed_name, feed_config).await
787            } else {
788                Err(format!("Feed '{}' is disabled", feed_name).into())
789            }
790        } else {
791            Err(format!("Feed '{}' not found", feed_name).into())
792        }
793    }
794
795    /// Check if an IP address is in malicious IP feeds
796    pub fn is_malicious_ip(&self, ip: &std::net::IpAddr) -> bool {
797        for (feed_name, feed_config) in &self.config.feeds {
798            if !feed_config.enabled {
799                continue;
800            }
801
802            if matches!(feed_config.feed_type, FeedType::MaliciousIPs) {
803                let file_path = self.config.feeds_directory.join(&feed_config.filename);
804                if self.check_ip_in_feed(&file_path, ip) {
805                    log::warn!("Malicious IP detected: {} (source: {})", ip, feed_name);
806                    return true;
807                }
808            }
809        }
810        false
811    }
812
813    /// Check if an IP address is a Tor exit node
814    pub fn is_tor_exit(&self, ip: &std::net::IpAddr) -> bool {
815        for (feed_name, feed_config) in &self.config.feeds {
816            if !feed_config.enabled {
817                continue;
818            }
819
820            if matches!(feed_config.feed_type, FeedType::TorExitNodes) {
821                let file_path = self.config.feeds_directory.join(&feed_config.filename);
822                if self.check_ip_in_feed(&file_path, ip) {
823                    log::warn!("Tor exit node detected: {} (source: {})", ip, feed_name);
824                    return true;
825                }
826            }
827        }
828        false
829    }
830
831    /// Check if an IP address is from a VPN or proxy service
832    pub fn is_proxy_vpn(&self, ip: &std::net::IpAddr) -> bool {
833        for (feed_name, feed_config) in &self.config.feeds {
834            if !feed_config.enabled {
835                continue;
836            }
837
838            if matches!(feed_config.feed_type, FeedType::VpnProxy) {
839                let file_path = self.config.feeds_directory.join(&feed_config.filename);
840                if self.check_ip_in_feed(&file_path, ip) {
841                    log::info!("VPN/Proxy detected: {} (source: {})", ip, feed_name);
842                    return true;
843                }
844            }
845        }
846        false
847    }
848
849    /// Helper method to check if an IP is present in a feed file
850    fn check_ip_in_feed(&self, file_path: &std::path::Path, ip: &std::net::IpAddr) -> bool {
851        if !file_path.exists() {
852            return false;
853        }
854
855        if let Ok(contents) = std::fs::read_to_string(file_path) {
856            for line in contents.lines() {
857                let line = line.trim();
858                if line.is_empty() || line.starts_with('#') {
859                    continue;
860                }
861
862                // Check exact IP match
863                if line == ip.to_string() {
864                    return true;
865                }
866
867                // Check CIDR network match
868                if line.contains('/') {
869                    match ip {
870                        std::net::IpAddr::V4(ipv4) => {
871                            if let Ok(network) = line.parse::<ipnetwork::Ipv4Network>()
872                                && network.contains(*ipv4) {
873                                    return true;
874                                }
875                        }
876                        std::net::IpAddr::V6(ipv6) => {
877                            if let Ok(network) = line.parse::<ipnetwork::Ipv6Network>()
878                                && network.contains(*ipv6) {
879                                    return true;
880                                }
881                        }
882                    }
883                }
884            }
885        }
886
887        false
888    }
889}
890
891/// Status of a threat intelligence feed
892#[derive(Debug, Clone)]
893pub enum FeedStatus {
894    /// Feed is disabled
895    Disabled,
896
897    /// Feed is enabled but not yet downloaded
898    NotDownloaded,
899
900    /// Feed is active and up-to-date
901    Active {
902        last_updated: std::time::SystemTime,
903        size_bytes: u64,
904    },
905
906    /// Feed has an error
907    Error(String),
908}
909
910