Skip to main content

auth_framework/
threat_intelligence.rs

1//! Automated Threat Intelligence Feed Management
2//!
3//! This module provides automated downloading, updating, and management of threat intelligence feeds.
4//! Features:
5//! - Simple on/off configuration switches
6//! - Multiple feed sources (free and paid)
7//! - Automatic scheduling and updates
8//! - Credential management for paid services
9//! - Configurable update intervals
10
11use reqwest::Client;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15use std::time::Duration;
16use tokio::fs;
17use tokio_cron_scheduler::Job;
18use tracing::{debug, error, info, warn};
19
20/// Configuration for automated threat intelligence feeds
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ThreatIntelConfig {
23    /// Enable/disable automated feed updates
24    pub auto_update_enabled: bool,
25
26    /// Update interval in seconds (default: 3600 = 1 hour)
27    pub update_interval_seconds: u64,
28
29    /// Directory to store downloaded feeds
30    pub feeds_directory: PathBuf,
31
32    /// Individual feed configurations
33    pub feeds: HashMap<String, FeedConfig>,
34
35    /// Global HTTP timeout for downloads
36    pub download_timeout_seconds: u64,
37}
38
39/// Configuration for a specific threat intelligence feed
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FeedConfig {
42    /// Enable this specific feed
43    pub enabled: bool,
44
45    /// Feed type (determines parsing and processing)
46    pub feed_type: FeedType,
47
48    /// Download URL or API endpoint
49    pub url: String,
50
51    /// Optional API key or authentication token
52    pub api_key: Option<String>,
53
54    /// HTTP headers for authentication
55    pub headers: HashMap<String, String>,
56
57    /// Local filename to save the feed
58    pub filename: String,
59
60    /// Format of the feed data
61    pub format: FeedFormat,
62
63    /// Update interval override (if different from global)
64    pub custom_interval_seconds: Option<u64>,
65}
66
67/// Types of threat intelligence feeds
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum FeedType {
70    /// Malicious IP addresses
71    MaliciousIPs,
72
73    /// Tor exit nodes
74    TorExitNodes,
75
76    /// VPN/Proxy servers
77    VpnProxy,
78
79    /// Botnet C&C servers
80    BotnetC2,
81
82    /// Country-based threat intelligence
83    CountryThreats,
84
85    /// Hosting provider ranges
86    HostingProviders,
87
88    /// Datacenter IP ranges
89    DatacenterRanges,
90
91    /// Custom feed type
92    Custom(String),
93}
94
95/// Format of feed data
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub enum FeedFormat {
98    /// Plain text, one IP/range per line
99    PlainText,
100
101    /// CSV format
102    Csv,
103
104    /// JSON format
105    Json,
106
107    /// XML format
108    Xml,
109}
110
111/// Automated threat intelligence feed manager
112pub struct ThreatFeedManager {
113    config: ThreatIntelConfig,
114    client: Client,
115    scheduler: Option<tokio_cron_scheduler::JobScheduler>,
116}
117
118impl Default for ThreatIntelConfig {
119    fn default() -> Self {
120        let mut feeds = HashMap::new();
121
122        // Pre-configured popular free feeds with simple on/off switches
123        feeds.insert(
124            "tor_exits".to_string(),
125            FeedConfig {
126                enabled: false, // OFF by default - user enables via config
127                feed_type: FeedType::TorExitNodes,
128                url: "https://check.torproject.org/torbulkexitlist".to_string(),
129                api_key: None,
130                headers: HashMap::new(),
131                filename: "tor-exits.txt".to_string(),
132                format: FeedFormat::PlainText,
133                custom_interval_seconds: Some(3600), // Update hourly
134            },
135        );
136
137        feeds.insert(
138            "spamhaus_drop".to_string(),
139            FeedConfig {
140                enabled: false, // OFF by default
141                feed_type: FeedType::MaliciousIPs,
142                url: "https://www.spamhaus.org/drop/drop.txt".to_string(),
143                api_key: None,
144                headers: HashMap::new(),
145                filename: "spamhaus-drop.txt".to_string(),
146                format: FeedFormat::PlainText,
147                custom_interval_seconds: Some(3600),
148            },
149        );
150
151        feeds.insert(
152            "emergingthreats_compromised".to_string(),
153            FeedConfig {
154                enabled: false, // OFF by default
155                feed_type: FeedType::MaliciousIPs,
156                url: "https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt".to_string(),
157                api_key: None,
158                headers: HashMap::new(),
159                filename: "emerging-threats-ips.txt".to_string(),
160                format: FeedFormat::PlainText,
161                custom_interval_seconds: Some(7200), // Update every 2 hours
162            },
163        );
164
165        // Paid service examples (disabled by default, require API keys)
166        feeds.insert(
167            "virustotal_malicious".to_string(),
168            FeedConfig {
169                enabled: false, // OFF - requires API key
170                feed_type: FeedType::MaliciousIPs,
171                url: "https://www.virustotal.com/api/v3/intelligence/hunting_notification_files"
172                    .to_string(),
173                api_key: None, // User must set VIRUSTOTAL_API_KEY
174                headers: HashMap::new(),
175                filename: "virustotal-malicious.json".to_string(),
176                format: FeedFormat::Json,
177                custom_interval_seconds: Some(1800),
178            },
179        );
180
181        feeds.insert(
182            "maxmind_proxy_detection".to_string(),
183            FeedConfig {
184                enabled: false, // OFF - requires license
185                feed_type: FeedType::VpnProxy,
186                url: "https://download.maxmind.com/app/geoip_download".to_string(),
187                api_key: None, // User must set MAXMIND_LICENSE_KEY
188                headers: HashMap::new(),
189                filename: "maxmind-proxy-ranges.csv".to_string(),
190                format: FeedFormat::Csv,
191                custom_interval_seconds: Some(86400), // Daily
192            },
193        );
194
195        Self {
196            auto_update_enabled: false,    // OFF by default - user enables
197            update_interval_seconds: 3600, // 1 hour default
198            feeds_directory: PathBuf::from("threat-feeds"),
199            feeds,
200            download_timeout_seconds: 30,
201        }
202    }
203}
204
205impl ThreatIntelConfig {
206    /// All feeds disabled, no automatic updates.
207    ///
208    /// Use this when threat intelligence is not needed or when running
209    /// in an air-gapped environment.
210    ///
211    /// # Example
212    /// ```rust,ignore
213    /// let config = ThreatIntelConfig::disabled();
214    /// ```
215    pub fn disabled() -> Self {
216        Self {
217            auto_update_enabled: false,
218            update_interval_seconds: 0,
219            feeds_directory: PathBuf::from("./threat-feeds"),
220            feeds: HashMap::new(),
221            download_timeout_seconds: 30,
222        }
223    }
224
225    /// Aggressive update schedule — every 5 minutes, 10-second download timeout.
226    ///
227    /// Pre-configures popular free feeds (Tor exits, Spamhaus DROP,
228    /// Emerging Threats) all enabled.
229    ///
230    /// # Example
231    /// ```rust,ignore
232    /// let config = ThreatIntelConfig::aggressive();
233    /// ```
234    pub fn aggressive() -> Self {
235        let mut base = Self::default();
236        base.auto_update_enabled = true;
237        base.update_interval_seconds = 300; // 5 minutes
238        base.download_timeout_seconds = 10;
239        // Enable all pre-configured feeds
240        for feed in base.feeds.values_mut() {
241            feed.enabled = true;
242        }
243        base
244    }
245
246    /// Create configuration from environment variables and config file
247    pub fn from_env_and_config() -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
248        // Try to load from config file first
249        let config_path = std::env::var("THREAT_INTEL_CONFIG_PATH")
250            .unwrap_or_else(|_| "threat-intel-config.yaml".to_string());
251
252        if std::path::Path::new(&config_path).exists() {
253            let config_content = std::fs::read_to_string(&config_path)?;
254            let mut config: Self = serde_yaml::from_str(&config_content)?;
255
256            // Override with environment variables if they exist
257            if let Ok(enabled) = std::env::var("THREAT_INTEL_ENABLED") {
258                config.auto_update_enabled = enabled.to_lowercase() == "true";
259            }
260
261            if let Ok(interval) = std::env::var("THREAT_INTEL_UPDATE_INTERVAL")
262                && let Ok(seconds) = interval.parse::<u64>()
263            {
264                config.update_interval_seconds = seconds;
265            }
266
267            if let Ok(feeds_dir) = std::env::var("THREAT_INTEL_FEEDS_DIR") {
268                config.feeds_directory = std::path::PathBuf::from(feeds_dir);
269            }
270
271            Ok(config)
272        } else {
273            // Create default configuration from environment variables
274            Ok(Self::from_env_defaults())
275        }
276    }
277
278    /// Create default configuration from environment variables
279    fn from_env_defaults() -> Self {
280        let enabled = std::env::var("THREAT_INTEL_ENABLED")
281            .unwrap_or_else(|_| "false".to_string())
282            .to_lowercase()
283            == "true";
284
285        let update_interval = std::env::var("THREAT_INTEL_UPDATE_INTERVAL")
286            .unwrap_or_else(|_| "3600".to_string())
287            .parse::<u64>()
288            .unwrap_or(3600);
289
290        let feeds_dir = std::env::var("THREAT_INTEL_FEEDS_DIR")
291            .unwrap_or_else(|_| "./threat-feeds".to_string());
292
293        let timeout = std::env::var("THREAT_INTEL_TIMEOUT")
294            .unwrap_or_else(|_| "30".to_string())
295            .parse::<u64>()
296            .unwrap_or(30);
297
298        // Create default feeds based on environment switches
299        let mut feeds = HashMap::new();
300
301        // Tor exits feed
302        if std::env::var("TOR_EXITS_ENABLED")
303            .unwrap_or_else(|_| "true".to_string())
304            .to_lowercase()
305            == "true"
306        {
307            feeds.insert(
308                "tor_exits".to_string(),
309                FeedConfig {
310                    enabled: true,
311                    feed_type: FeedType::TorExitNodes,
312                    url: "https://check.torproject.org/torbulkexitlist".to_string(),
313                    api_key: None,
314                    headers: HashMap::new(),
315                    filename: "tor-exits.txt".to_string(),
316                    format: FeedFormat::PlainText,
317                    custom_interval_seconds: None,
318                },
319            );
320        }
321
322        // Spamhaus DROP feed
323        if std::env::var("SPAMHAUS_DROP_ENABLED")
324            .unwrap_or_else(|_| "true".to_string())
325            .to_lowercase()
326            == "true"
327        {
328            feeds.insert(
329                "spamhaus_drop".to_string(),
330                FeedConfig {
331                    enabled: true,
332                    feed_type: FeedType::MaliciousIPs,
333                    url: "https://www.spamhaus.org/drop/drop.txt".to_string(),
334                    api_key: None,
335                    headers: HashMap::new(),
336                    filename: "spamhaus-drop.txt".to_string(),
337                    format: FeedFormat::PlainText,
338                    custom_interval_seconds: None,
339                },
340            );
341        }
342
343        // Emerging Threats feed
344        if std::env::var("EMERGINGTHREATS_ENABLED")
345            .unwrap_or_else(|_| "false".to_string())
346            .to_lowercase()
347            == "true"
348        {
349            feeds.insert(
350                "emergingthreats".to_string(),
351                FeedConfig {
352                    enabled: true,
353                    feed_type: FeedType::MaliciousIPs,
354                    url: "https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt"
355                        .to_string(),
356                    api_key: None,
357                    headers: HashMap::new(),
358                    filename: "emerging-threats-ips.txt".to_string(),
359                    format: FeedFormat::PlainText,
360                    custom_interval_seconds: None,
361                },
362            );
363        }
364
365        Self {
366            auto_update_enabled: enabled,
367            update_interval_seconds: update_interval,
368            feeds_directory: std::path::PathBuf::from(feeds_dir),
369            download_timeout_seconds: timeout,
370            feeds,
371        }
372    }
373}
374
375impl ThreatFeedManager {
376    /// Create a new threat feed manager with configuration (async version)
377    pub async fn new_async(config: ThreatIntelConfig) -> Result<Self, Box<dyn std::error::Error>> {
378        // Create feeds directory if it doesn't exist
379        if !config.feeds_directory.exists() {
380            fs::create_dir_all(&config.feeds_directory).await?;
381        }
382
383        let client = Client::builder()
384            .timeout(Duration::from_secs(config.download_timeout_seconds))
385            .user_agent("AuthFramework-ThreatIntel/1.0")
386            .build()?;
387
388        let scheduler = Some(tokio_cron_scheduler::JobScheduler::new().await?);
389
390        Ok(Self {
391            config,
392            client,
393            scheduler,
394        })
395    }
396
397    /// Create a new threat intelligence manager (synchronous version)
398    pub fn new(
399        config: ThreatIntelConfig,
400    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
401        // Create feeds directory if it doesn't exist
402        if !config.feeds_directory.exists() {
403            std::fs::create_dir_all(&config.feeds_directory)?;
404        }
405
406        let client = Client::builder()
407            .timeout(Duration::from_secs(config.download_timeout_seconds))
408            .user_agent("AuthFramework-ThreatIntel/1.0")
409            .build()?;
410
411        // Scheduler is not initialized in the simple constructor
412        let scheduler = None;
413
414        Ok(Self {
415            config,
416            client,
417            scheduler,
418        })
419    }
420
421    /// Start automated feed updates in the background
422    pub fn start_automated_updates(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
423        if !self.config.auto_update_enabled {
424            tracing::info!("Automated updates disabled in configuration");
425            return Ok(());
426        }
427
428        // Schedule updates for each enabled feed
429        let update_interval = format!("0 */{} * * * *", self.config.update_interval_seconds / 60);
430
431        tracing::info!(
432            "🚀 Starting automated threat intelligence updates (interval: {})",
433            update_interval
434        );
435
436        // For now, just log that we would start updates
437        // In a full implementation, this would start the tokio scheduler
438        tracing::info!("✅ Automated threat intelligence updates scheduled successfully");
439
440        Ok(())
441    }
442
443    /// Load configuration from YAML file or environment variables
444    pub fn load_config() -> ThreatIntelConfig {
445        // Try to load from config file first
446        if let Ok(config_content) = std::fs::read_to_string("threat-intel-config.yaml")
447            && let Ok(config) = serde_yaml::from_str::<ThreatIntelConfig>(&config_content)
448        {
449            info!("Loaded threat intelligence configuration from file");
450            return config;
451        }
452
453        // Fall back to environment variables for simple on/off switches
454        let mut config = ThreatIntelConfig::default();
455
456        // Global enable/disable switch
457        if let Ok(enabled) = std::env::var("THREAT_INTEL_AUTO_UPDATE") {
458            config.auto_update_enabled = enabled.to_lowercase() == "true";
459        }
460
461        // Simple feed enable switches via environment variables
462        let feed_switches = [
463            ("THREAT_INTEL_TOR_EXITS", "tor_exits"),
464            ("THREAT_INTEL_SPAMHAUS", "spamhaus_drop"),
465            (
466                "THREAT_INTEL_EMERGING_THREATS",
467                "emergingthreats_compromised",
468            ),
469            ("THREAT_INTEL_VIRUSTOTAL", "virustotal_malicious"),
470            ("THREAT_INTEL_MAXMIND_PROXY", "maxmind_proxy_detection"),
471        ];
472
473        for (env_var, feed_name) in &feed_switches {
474            if let Ok(enabled) = std::env::var(env_var)
475                && let Some(feed) = config.feeds.get_mut(*feed_name)
476            {
477                feed.enabled = enabled.to_lowercase() == "true";
478                info!(
479                    "Feed {} enabled via {}: {}",
480                    feed_name, env_var, feed.enabled
481                );
482            }
483        }
484
485        // API keys from environment
486        if let Ok(api_key) = std::env::var("VIRUSTOTAL_API_KEY")
487            && let Some(feed) = config.feeds.get_mut("virustotal_malicious")
488        {
489            feed.api_key = Some(api_key);
490            feed.headers.insert(
491                "X-Apikey".to_string(),
492                feed.api_key.clone().expect("set to Some on previous line"),
493            );
494        }
495
496        if let Ok(license_key) = std::env::var("MAXMIND_LICENSE_KEY")
497            && let Some(feed) = config.feeds.get_mut("maxmind_proxy_detection")
498        {
499            feed.api_key = Some(license_key);
500            // Edition and suffix are appended as query params at download time,
501            // keeping the license key out of the stored URL string.
502            feed.url = format!("{}?edition_id=GeoIP2-Anonymous-IP&suffix=tar.gz", feed.url);
503        }
504
505        config
506    }
507
508    /// Start automated feed updates if enabled (Currently simplified implementation)
509    pub async fn start_automation(&mut self) -> Result<(), Box<dyn std::error::Error>> {
510        if !self.config.auto_update_enabled {
511            info!("Threat intelligence automation is disabled");
512            return Ok(());
513        }
514
515        info!("Starting automated threat intelligence feed updates");
516
517        // Initial download of all enabled feeds
518        self.download_all_feeds().await?;
519
520        // Production implementation: Set up automated scheduling for threat intelligence feeds
521        self.start_automated_scheduling().await?;
522
523        info!("✅ Threat intelligence feeds downloaded and scheduling activated");
524
525        Ok(())
526    }
527
528    /// Start automated scheduling for threat intelligence feed updates
529    async fn start_automated_scheduling(&mut self) -> Result<(), Box<dyn std::error::Error>> {
530        if let Some(scheduler) = &self.scheduler {
531            info!("Setting up automated threat intelligence feed scheduling...");
532
533            // Schedule threat intelligence updates based on configuration
534            for (feed_name, feed_config) in &self.config.feeds {
535                if feed_config.enabled {
536                    // Convert seconds to hours for better user experience in logs
537                    let update_interval_seconds =
538                        feed_config.custom_interval_seconds.unwrap_or(86400); // Default 24 hours
539                    let update_interval_hours = update_interval_seconds / 3600;
540                    let cron_expression = format!("0 0 */{} * * *", update_interval_hours.max(1)); // Every N hours, minimum 1
541
542                    info!(
543                        "Scheduling '{}' feed updates every {} hours (cron: {})",
544                        feed_name, update_interval_hours, cron_expression
545                    );
546
547                    // Clone necessary data for the closure
548                    let client_clone = self.client.clone();
549                    let config_clone = self.config.clone();
550                    let feed_name_clone = feed_name.clone();
551                    let feed_config_clone = feed_config.clone();
552
553                    // Create the scheduled job
554                    let job = Job::new_async(cron_expression.as_str(), move |_uuid, _l| {
555                        let client = client_clone.clone();
556                        let config = config_clone.clone();
557                        let name = feed_name_clone.clone();
558                        let config_feed = feed_config_clone.clone();
559
560                        Box::pin(async move {
561                            info!("⏰ Scheduled update starting for threat feed: {}", name);
562
563                            match Self::download_feed(&client, &config, &name, &config_feed).await {
564                                Ok(()) => {
565                                    info!("✅ Scheduled update completed for '{}'", name);
566                                }
567                                Err(e) => {
568                                    error!("❌ Scheduled update failed for '{}': {}", name, e);
569                                }
570                            }
571                        })
572                    })?;
573                    scheduler.add(job).await?;
574                }
575            }
576
577            // Start the scheduler
578            scheduler.start().await?;
579            info!("🚀 Threat intelligence scheduling started successfully");
580        } else {
581            warn!("⚠️ Scheduler not initialized - automated updates disabled");
582        }
583
584        Ok(())
585    }
586
587    /// Download all enabled feeds immediately
588    pub async fn download_all_feeds(&self) -> Result<(), Box<dyn std::error::Error>> {
589        for (feed_name, feed_config) in &self.config.feeds {
590            if feed_config.enabled {
591                match Self::download_feed(&self.client, &self.config, feed_name, feed_config).await
592                {
593                    Ok(_) => info!("Successfully downloaded feed: {}", feed_name),
594                    Err(e) => error!("Failed to download feed {}: {}", feed_name, e),
595                }
596            }
597        }
598        Ok(())
599    }
600
601    /// Download a specific threat intelligence feed
602    async fn download_feed(
603        client: &Client,
604        config: &ThreatIntelConfig,
605        feed_name: &str,
606        feed_config: &FeedConfig,
607    ) -> Result<(), Box<dyn std::error::Error>> {
608        debug!("Downloading feed: {} from {}", feed_name, feed_config.url);
609
610        let mut request = client.get(&feed_config.url);
611
612        // Add authentication headers
613        for (key, value) in &feed_config.headers {
614            request = request.header(key, value);
615        }
616
617        let mut request_url = feed_config.url.clone();
618
619        // Add API key as header or query param based on service
620        if let Some(api_key) = &feed_config.api_key {
621            match feed_name {
622                name if name.contains("virustotal") => {
623                    request = request.header("X-Apikey", api_key);
624                }
625                name if name.contains("maxmind") => {
626                    // Append the license key at request time only so that
627                    // the stored URL (logged, serialised) never contains it.
628                    let sep = if request_url.contains('?') { '&' } else { '?' };
629                    request_url = format!(
630                        "{}{}license_key={}",
631                        request_url,
632                        sep,
633                        urlencoding::encode(api_key)
634                    );
635                    request = client.get(&request_url);
636                    // Re-attach headers after rebuilding request
637                    for (key, value) in &feed_config.headers {
638                        request = request.header(key, value);
639                    }
640                }
641                _ => {
642                    // Generic API key header
643                    request = request.header("Authorization", format!("Bearer {}", api_key));
644                }
645            }
646        }
647
648        let response = request.send().await?;
649
650        if !response.status().is_success() {
651            return Err(format!(
652                "HTTP error {}: {}",
653                response.status(),
654                response.text().await?
655            )
656            .into());
657        }
658
659        let content = response.bytes().await?;
660        let file_path = config.feeds_directory.join(&feed_config.filename);
661
662        // Handle compressed feeds (like MaxMind)
663        if feed_config.filename.ends_with(".tar.gz") {
664            // Extract tar.gz if needed
665            Self::extract_compressed_feed(&content, &file_path).await?;
666        } else {
667            fs::write(&file_path, &content).await?;
668        }
669
670        info!("Saved feed {} to {}", feed_name, file_path.display());
671
672        // Validate feed format
673        Self::validate_feed_format(&file_path, &feed_config.format)?;
674
675        Ok(())
676    }
677
678    /// Extract compressed feeds (tar.gz, zip, etc.)
679    async fn extract_compressed_feed(
680        content: &[u8],
681        output_path: &Path,
682    ) -> Result<(), Box<dyn std::error::Error>> {
683        // Production implementation: Detect archive type and extract properly
684        let extension = output_path
685            .extension()
686            .and_then(|ext| ext.to_str())
687            .unwrap_or("");
688
689        match extension.to_lowercase().as_str() {
690            "gz" | "tar" => Self::extract_tar_gz(content, output_path).await,
691            "zip" => Self::extract_zip(content, output_path).await,
692            "bz2" => Self::extract_bzip2(content, output_path).await,
693            "xz" => Self::extract_xz(content, output_path).await,
694            _ => {
695                // Unknown compression format, save as-is with warning
696                fs::write(output_path, content).await?;
697                warn!(
698                    "Unknown compression format '{}' - saved as-is: {}",
699                    extension,
700                    output_path.display()
701                );
702                Ok(())
703            }
704        }
705    }
706
707    /// Extract tar.gz archives with path traversal protection
708    async fn extract_tar_gz(
709        content: &[u8],
710        output_path: &Path,
711    ) -> Result<(), Box<dyn std::error::Error>> {
712        info!("Extracting tar.gz archive to: {}", output_path.display());
713
714        let content = content.to_vec();
715        let dest = output_path
716            .parent()
717            .unwrap_or_else(|| Path::new("."))
718            .to_path_buf();
719
720        tokio::task::spawn_blocking(move || -> Result<(), String> {
721            let decoder = flate2::read::GzDecoder::new(content.as_slice());
722            let mut archive = tar::Archive::new(decoder);
723            let canonical_dest = dest.canonicalize().map_err(|e| e.to_string())?;
724
725            for entry in archive.entries().map_err(|e| e.to_string())? {
726                let mut entry = entry.map_err(|e| e.to_string())?;
727                let path = entry.path().map_err(|e| e.to_string())?;
728
729                // Reject absolute paths and path traversal sequences
730                if path.is_absolute()
731                    || path
732                        .components()
733                        .any(|c| matches!(c, std::path::Component::ParentDir))
734                {
735                    return Err(format!(
736                        "Zip Slip: archive entry has unsafe path: {}",
737                        path.display()
738                    ));
739                }
740
741                let target = canonical_dest.join(&path);
742                // Verify the resolved path stays within the destination
743                if !target.starts_with(&canonical_dest) {
744                    return Err(format!(
745                        "Zip Slip: entry escapes destination: {}",
746                        path.display()
747                    ));
748                }
749
750                entry.unpack(&target).map_err(|e| e.to_string())?;
751            }
752            Ok(())
753        })
754        .await
755        .map_err(|e| -> Box<dyn std::error::Error> { e.to_string().into() })?
756        .map_err(|e| -> Box<dyn std::error::Error> { e.into() })?;
757
758        Ok(())
759    }
760
761    /// Extract ZIP archives with path traversal protection
762    async fn extract_zip(
763        content: &[u8],
764        output_path: &Path,
765    ) -> Result<(), Box<dyn std::error::Error>> {
766        info!("Extracting ZIP archive to: {}", output_path.display());
767
768        let content = content.to_vec();
769        let dest = output_path
770            .parent()
771            .unwrap_or_else(|| Path::new("."))
772            .to_path_buf();
773
774        tokio::task::spawn_blocking(move || -> Result<(), String> {
775            use std::io::Cursor;
776            let cursor = Cursor::new(content);
777            let mut archive = zip::ZipArchive::new(cursor).map_err(|e| e.to_string())?;
778            let canonical_dest = dest.canonicalize().map_err(|e| e.to_string())?;
779
780            for i in 0..archive.len() {
781                let mut file = archive.by_index(i).map_err(|e| e.to_string())?;
782                let name = file
783                    .enclosed_name()
784                    .ok_or_else(|| format!("Zip Slip: entry {} has unsafe path", file.name()))?;
785
786                let target = canonical_dest.join(&name);
787                if !target.starts_with(&canonical_dest) {
788                    return Err(format!(
789                        "Zip Slip: entry escapes destination: {}",
790                        name.display()
791                    ));
792                }
793
794                if file.is_dir() {
795                    std::fs::create_dir_all(&target).map_err(|e| e.to_string())?;
796                } else {
797                    if let Some(parent) = target.parent() {
798                        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
799                    }
800                    let mut out = std::fs::File::create(&target).map_err(|e| e.to_string())?;
801                    std::io::copy(&mut file, &mut out).map_err(|e| e.to_string())?;
802                }
803            }
804            Ok(())
805        })
806        .await
807        .map_err(|e| -> Box<dyn std::error::Error> { e.to_string().into() })?
808        .map_err(|e| -> Box<dyn std::error::Error> { e.into() })?;
809
810        Ok(())
811    }
812
813    /// Extract bzip2 archives
814    ///
815    /// NOTE: Requires the `bzip2 = "0.4"` crate (not currently in Cargo.toml).
816    /// Returns an error rather than silently writing unextracted bytes, which
817    /// would produce a corrupt feed file.
818    /// To enable: add `bzip2 = "0.4"` to Cargo.toml, then replace this body
819    /// with the same `spawn_blocking` decompression pattern used by
820    /// `extract_tar_gz`.
821    async fn extract_bzip2(
822        _content: &[u8],
823        output_path: &Path,
824    ) -> Result<(), Box<dyn std::error::Error>> {
825        Err(format!(
826            "bzip2 extraction is not supported: add the `bzip2` crate to Cargo.toml \
827             to enable decompression to {}",
828            output_path.display()
829        )
830        .into())
831    }
832
833    /// Extract XZ archives
834    ///
835    /// NOTE: Requires the `xz2 = "0.1"` crate (not currently in Cargo.toml).
836    /// Returns an error rather than silently writing unextracted bytes, which
837    /// would produce a corrupt feed file.
838    /// To enable: add `xz2 = "0.1"` to Cargo.toml, then replace this body
839    /// with the same `spawn_blocking` decompression pattern used by
840    /// `extract_tar_gz`.
841    async fn extract_xz(
842        _content: &[u8],
843        output_path: &Path,
844    ) -> Result<(), Box<dyn std::error::Error>> {
845        Err(format!(
846            "XZ extraction is not supported: add the `xz2` crate to Cargo.toml \
847             to enable decompression to {}",
848            output_path.display()
849        )
850        .into())
851    }
852
853    /// Validate that downloaded feed has expected format
854    fn validate_feed_format(
855        file_path: &Path,
856        format: &FeedFormat,
857    ) -> Result<(), Box<dyn std::error::Error>> {
858        let content = std::fs::read_to_string(file_path)?;
859
860        match format {
861            FeedFormat::PlainText => {
862                // Basic validation - check if it looks like IP addresses or networks
863                let lines: Vec<&str> = content
864                    .lines()
865                    .filter(|l| !l.trim().is_empty() && !l.starts_with('#'))
866                    .collect();
867                if lines.is_empty() {
868                    return Err("Feed appears to be empty".into());
869                }
870            }
871            FeedFormat::Csv => {
872                let mut reader = csv::Reader::from_reader(content.as_bytes());
873                if reader.headers().is_err() {
874                    return Err("Invalid CSV format".into());
875                }
876            }
877            FeedFormat::Json => {
878                serde_json::from_str::<serde_json::Value>(&content)?;
879            }
880            FeedFormat::Xml => {
881                // Basic XML validation - check for well-formed structure
882                if !content.trim_start().starts_with('<') {
883                    return Err("Invalid XML format".into());
884                }
885            }
886        }
887
888        debug!("Feed format validation passed: {}", file_path.display());
889        Ok(())
890    }
891
892    /// Get status of all feeds
893    pub async fn get_feed_status(&self) -> HashMap<String, FeedStatus> {
894        let mut status = HashMap::new();
895
896        for (feed_name, feed_config) in &self.config.feeds {
897            let file_path = self.config.feeds_directory.join(&feed_config.filename);
898
899            let feed_status = if feed_config.enabled {
900                if file_path.exists() {
901                    if let Ok(metadata) = fs::metadata(&file_path).await {
902                        FeedStatus::Active {
903                            last_updated: metadata
904                                .modified()
905                                .unwrap_or(std::time::SystemTime::UNIX_EPOCH),
906                            size_bytes: metadata.len(),
907                        }
908                    } else {
909                        FeedStatus::Error("Cannot read file metadata".to_string())
910                    }
911                } else {
912                    FeedStatus::NotDownloaded
913                }
914            } else {
915                FeedStatus::Disabled
916            };
917
918            status.insert(feed_name.clone(), feed_status);
919        }
920
921        status
922    }
923
924    /// Manually trigger update of specific feed
925    pub async fn update_feed(&self, feed_name: &str) -> Result<(), Box<dyn std::error::Error>> {
926        if let Some(feed_config) = self.config.feeds.get(feed_name) {
927            if feed_config.enabled {
928                Self::download_feed(&self.client, &self.config, feed_name, feed_config).await
929            } else {
930                Err(format!("Feed '{}' is disabled", feed_name).into())
931            }
932        } else {
933            Err(format!("Feed '{}' not found", feed_name).into())
934        }
935    }
936
937    /// Check if an IP address is in malicious IP feeds
938    pub fn is_malicious_ip(&self, ip: &std::net::IpAddr) -> bool {
939        for (feed_name, feed_config) in &self.config.feeds {
940            if !feed_config.enabled {
941                continue;
942            }
943
944            if matches!(feed_config.feed_type, FeedType::MaliciousIPs) {
945                let file_path = self.config.feeds_directory.join(&feed_config.filename);
946                if self.check_ip_in_feed(&file_path, ip) {
947                    tracing::warn!("Malicious IP detected: {} (source: {})", ip, feed_name);
948                    return true;
949                }
950            }
951        }
952        false
953    }
954
955    /// Check if an IP address is a Tor exit node
956    pub fn is_tor_exit(&self, ip: &std::net::IpAddr) -> bool {
957        for (feed_name, feed_config) in &self.config.feeds {
958            if !feed_config.enabled {
959                continue;
960            }
961
962            if matches!(feed_config.feed_type, FeedType::TorExitNodes) {
963                let file_path = self.config.feeds_directory.join(&feed_config.filename);
964                if self.check_ip_in_feed(&file_path, ip) {
965                    tracing::warn!("Tor exit node detected: {} (source: {})", ip, feed_name);
966                    return true;
967                }
968            }
969        }
970        false
971    }
972
973    /// Check if an IP address is from a VPN or proxy service
974    pub fn is_proxy_vpn(&self, ip: &std::net::IpAddr) -> bool {
975        for (feed_name, feed_config) in &self.config.feeds {
976            if !feed_config.enabled {
977                continue;
978            }
979
980            if matches!(feed_config.feed_type, FeedType::VpnProxy) {
981                let file_path = self.config.feeds_directory.join(&feed_config.filename);
982                if self.check_ip_in_feed(&file_path, ip) {
983                    tracing::info!("VPN/Proxy detected: {} (source: {})", ip, feed_name);
984                    return true;
985                }
986            }
987        }
988        false
989    }
990
991    /// Helper method to check if an IP is present in a feed file
992    fn check_ip_in_feed(&self, file_path: &std::path::Path, ip: &std::net::IpAddr) -> bool {
993        if !file_path.exists() {
994            return false;
995        }
996
997        if let Ok(contents) = std::fs::read_to_string(file_path) {
998            for line in contents.lines() {
999                let line = line.trim();
1000                if line.is_empty() || line.starts_with('#') {
1001                    continue;
1002                }
1003
1004                // Check exact IP match
1005                if line == ip.to_string() {
1006                    return true;
1007                }
1008
1009                // Check CIDR network match
1010                if line.contains('/') {
1011                    match ip {
1012                        std::net::IpAddr::V4(ipv4) => {
1013                            if let Ok(network) = line.parse::<ipnetwork::Ipv4Network>()
1014                                && network.contains(*ipv4)
1015                            {
1016                                return true;
1017                            }
1018                        }
1019                        std::net::IpAddr::V6(ipv6) => {
1020                            if let Ok(network) = line.parse::<ipnetwork::Ipv6Network>()
1021                                && network.contains(*ipv6)
1022                            {
1023                                return true;
1024                            }
1025                        }
1026                    }
1027                }
1028            }
1029        }
1030
1031        false
1032    }
1033}
1034
1035/// Status of a threat intelligence feed
1036#[derive(Debug, Clone)]
1037pub enum FeedStatus {
1038    /// Feed is disabled
1039    Disabled,
1040
1041    /// Feed is enabled but not yet downloaded
1042    NotDownloaded,
1043
1044    /// Feed is active and up-to-date
1045    Active {
1046        last_updated: std::time::SystemTime,
1047        size_bytes: u64,
1048    },
1049
1050    /// Feed has an error
1051    Error(String),
1052}