riglr_web_tools/
news.rs

1//! Comprehensive cryptocurrency and financial news aggregation
2//!
3//! This module provides news aggregation with heuristic-based sentiment analysis
4//! and market impact assessment for AI agents to stay informed about market developments.
5
6use crate::{client::WebClient, error::WebToolError};
7use async_trait::async_trait;
8use chrono::{DateTime, Utc};
9use regex::Regex;
10use riglr_core::provider::ApplicationContext;
11use riglr_core::sentiment::SentimentAnalyzerMarker;
12use riglr_macros::tool;
13use schemars::JsonSchema;
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::hash::{Hash, Hasher};
17use std::sync::Arc;
18use tracing::{debug, info, warn};
19
20/// Trait for pluggable sentiment analysis
21#[async_trait]
22pub trait SentimentAnalyzer: Send + Sync {
23    /// Analyze sentiment from text components
24    async fn analyze(
25        &self,
26        title: &str,
27        description: Option<&str>,
28        content: Option<&str>,
29    ) -> Result<NewsSentiment, WebToolError>;
30}
31
32/// Default lexicon-based sentiment analyzer
33pub struct LexiconSentimentAnalyzer {
34    /// Positive words with their weights
35    positive_words: Vec<(&'static str, f64)>,
36    /// Negative words with their weights
37    negative_words: Vec<(&'static str, f64)>,
38}
39
40impl Default for LexiconSentimentAnalyzer {
41    fn default() -> Self {
42        Self {
43            positive_words: vec![
44                ("bullish", 0.8),
45                ("surge", 0.7),
46                ("rally", 0.7),
47                ("breakthrough", 0.8),
48                ("adoption", 0.6),
49                ("partnership", 0.6),
50                ("growth", 0.5),
51                ("success", 0.6),
52                ("innovative", 0.5),
53                ("leading", 0.4),
54                ("strong", 0.5),
55                ("positive", 0.5),
56                ("gains", 0.6),
57                ("rise", 0.5),
58                ("increase", 0.4),
59                ("improve", 0.5),
60                ("upgrade", 0.6),
61                ("expand", 0.5),
62                ("launch", 0.4),
63                ("milestone", 0.6),
64            ],
65            negative_words: vec![
66                ("bearish", -0.8),
67                ("crash", -0.9),
68                ("plunge", -0.8),
69                ("collapse", -0.9),
70                ("hack", -0.9),
71                ("exploit", -0.9),
72                ("scam", -0.9),
73                ("fraud", -0.9),
74                ("decline", -0.6),
75                ("fall", -0.5),
76                ("drop", -0.5),
77                ("loss", -0.6),
78                ("failure", -0.7),
79                ("risk", -0.4),
80                ("concern", -0.4),
81                ("warning", -0.5),
82                ("threat", -0.6),
83                ("vulnerable", -0.6),
84                ("weak", -0.5),
85                ("crisis", -0.8),
86                ("panic", -0.7),
87                ("fear", -0.6),
88                ("uncertainty", -0.5),
89                ("volatile", -0.4),
90                ("dump", -0.7),
91                ("rug", -0.9),
92                ("regulatory", -0.3),
93                ("lawsuit", -0.6),
94                ("investigation", -0.5),
95                ("ban", -0.7),
96            ],
97        }
98    }
99}
100
101#[async_trait]
102impl SentimentAnalyzer for LexiconSentimentAnalyzer {
103    async fn analyze(
104        &self,
105        title: &str,
106        description: Option<&str>,
107        content: Option<&str>,
108    ) -> Result<NewsSentiment, WebToolError> {
109        // Call the existing analyze_sentiment_impl function with the lexicon
110        Ok(self.analyze_sentiment_impl(title, description, content))
111    }
112}
113
114// Implement the marker trait to allow storage in ApplicationContext
115impl SentimentAnalyzerMarker for LexiconSentimentAnalyzer {}
116
117impl LexiconSentimentAnalyzer {
118    /// Implementation of lexicon-based sentiment analysis
119    fn analyze_sentiment_impl(
120        &self,
121        title: &str,
122        description: Option<&str>,
123        content: Option<&str>,
124    ) -> NewsSentiment {
125        let full_text = format!(
126            "{} {} {}",
127            title,
128            description.unwrap_or(""),
129            content.unwrap_or("")
130        );
131
132        let text_lower = full_text.to_lowercase();
133
134        // Calculate overall sentiment score
135        let mut sentiment_score = 0.0;
136        let mut word_count = 0;
137
138        for (word, weight) in &self.positive_words {
139            let count = text_lower.matches(word).count();
140            sentiment_score += count as f64 * weight;
141            word_count += count;
142        }
143
144        for (word, weight) in &self.negative_words {
145            let count = text_lower.matches(word).count();
146            sentiment_score += count as f64 * weight;
147            word_count += count;
148        }
149
150        // Normalize sentiment score
151        let overall_score = if word_count > 0 {
152            (sentiment_score / word_count as f64).clamp(-1.0, 1.0)
153        } else {
154            0.0
155        };
156
157        // Calculate confidence based on word count and text length
158        let confidence = ((word_count as f64 / 10.0).min(1.0) * 0.5
159            + (full_text.len() as f64 / 500.0).min(1.0) * 0.5)
160            .clamp(0.3, 0.95);
161
162        // Determine classification
163        let classification = if overall_score > 0.2 {
164            "Bullish"
165        } else if overall_score < -0.2 {
166            "Bearish"
167        } else {
168            "Neutral"
169        }
170        .to_string();
171
172        // Calculate emotional indicators
173        let emotions = self.calculate_emotions(&text_lower);
174
175        // Extract key phrases that contribute to sentiment
176        let key_phrases = self.extract_key_phrases(&full_text);
177
178        // Topic-specific sentiments
179        let topic_sentiments = self.calculate_topic_sentiments(&text_lower, overall_score);
180
181        NewsSentiment {
182            overall_score,
183            confidence,
184            classification,
185            topic_sentiments,
186            emotions,
187            key_phrases,
188        }
189    }
190
191    fn calculate_emotions(&self, text_lower: &str) -> EmotionalIndicators {
192        let fear_words = ["fear", "panic", "crash", "crisis", "collapse"];
193        let greed_words = ["moon", "rally", "surge", "bullish", "fomo"];
194
195        let fear_count = fear_words
196            .iter()
197            .filter(|w| text_lower.contains(*w))
198            .count();
199        let greed_count = greed_words
200            .iter()
201            .filter(|w| text_lower.contains(*w))
202            .count();
203
204        EmotionalIndicators {
205            fear: (fear_count as f64 / 5.0).min(1.0),
206            greed: (greed_count as f64 / 5.0).min(1.0),
207            excitement: if text_lower.contains("exciting") || text_lower.contains("breakthrough") {
208                0.5
209            } else {
210                0.0
211            },
212            uncertainty: if text_lower.contains("uncertain") || text_lower.contains("volatile") {
213                0.5
214            } else {
215                0.0
216            },
217            urgency: if text_lower.contains("urgent") || text_lower.contains("immediate") {
218                0.5
219            } else {
220                0.0
221            },
222        }
223    }
224
225    fn extract_key_phrases(&self, full_text: &str) -> Vec<SentimentPhrase> {
226        let mut key_phrases = Vec::new();
227
228        // Look for specific phrase patterns
229        let phrase_patterns = [
230            (
231                r"(?i)(bullish|positive|optimistic) (?:on|about|for) (\w+)",
232                0.5,
233            ),
234            (
235                r"(?i)(bearish|negative|pessimistic) (?:on|about|for) (\w+)",
236                -0.5,
237            ),
238            (r"(?i)all.time.high", 0.6),
239            (r"(?i)all.time.low", -0.6),
240            (r"(?i)break(?:ing|s)?\s+(?:through|above)", 0.4),
241            (r"(?i)break(?:ing|s)?\s+(?:below|down)", -0.4),
242        ];
243
244        for (pattern, contribution) in &phrase_patterns {
245            if let Ok(re) = Regex::new(pattern) {
246                for matched in re.find_iter(full_text) {
247                    key_phrases.push(SentimentPhrase {
248                        phrase: matched.as_str().to_string(),
249                        sentiment_contribution: *contribution,
250                        confidence: 0.7,
251                    });
252                }
253            }
254        }
255
256        key_phrases
257    }
258
259    fn calculate_topic_sentiments(
260        &self,
261        text_lower: &str,
262        overall_score: f64,
263    ) -> HashMap<String, f64> {
264        let mut topic_sentiments = HashMap::new();
265        let topics = [
266            "bitcoin",
267            "ethereum",
268            "defi",
269            "nft",
270            "regulation",
271            "adoption",
272        ];
273
274        for topic in &topics {
275            if text_lower.contains(topic) {
276                // Calculate sentiment specific to this topic's context
277                let topic_score = if text_lower.contains(&format!("{} surge", topic))
278                    || text_lower.contains(&format!("{} rally", topic))
279                {
280                    0.5
281                } else if text_lower.contains(&format!("{} crash", topic))
282                    || text_lower.contains(&format!("{} plunge", topic))
283                {
284                    -0.5
285                } else {
286                    overall_score * 0.7 // Slightly dampened overall sentiment
287                };
288                topic_sentiments.insert(topic.to_string(), topic_score);
289            }
290        }
291
292        topic_sentiments
293    }
294}
295
296/// Configuration for news aggregation services
297#[derive(Debug, Clone)]
298pub struct NewsConfig {
299    /// NewsAPI.org API key
300    pub newsapi_key: String,
301    /// CryptoPanic API key
302    pub cryptopanic_key: String,
303    /// Base URL for news aggregation service
304    pub base_url: String,
305    /// Maximum articles per request (default: 50)
306    pub max_articles: u32,
307    /// News freshness window in hours (default: 24)
308    pub freshness_hours: u32,
309    /// Minimum credibility score (0-100)
310    pub min_credibility_score: u32,
311}
312
313/// Comprehensive news article with metadata and analysis
314#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
315pub struct NewsArticle {
316    /// Unique article identifier
317    pub id: String,
318    /// Article title
319    pub title: String,
320    /// Article URL
321    pub url: String,
322    /// Article description/summary
323    pub description: Option<String>,
324    /// Full article content (if extracted)
325    pub content: Option<String>,
326    /// Publication timestamp
327    pub published_at: DateTime<Utc>,
328    /// News source information
329    pub source: NewsSource,
330    /// Article category and tags
331    pub category: NewsCategory,
332    /// Sentiment analysis results
333    pub sentiment: NewsSentiment,
334    /// Market impact assessment
335    pub market_impact: MarketImpact,
336    /// Entities mentioned in the article
337    pub entities: Vec<NewsEntity>,
338    /// Related cryptocurrencies/assets
339    pub related_assets: Vec<String>,
340    /// Article quality metrics
341    pub quality_metrics: QualityMetrics,
342    /// Social engagement metrics
343    pub social_metrics: Option<SocialMetrics>,
344}
345
346/// News source information and credibility
347#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
348pub struct NewsSource {
349    /// Source identifier
350    pub id: String,
351    /// Source name (e.g., "CoinDesk", "Reuters")
352    pub name: String,
353    /// Source website URL
354    pub url: String,
355    /// Source category (Mainstream, Crypto-Native, Blog, etc.)
356    pub category: String,
357    /// Credibility score (0-100)
358    pub credibility_score: u32,
359    /// Historical accuracy rating
360    pub accuracy_rating: Option<f64>,
361    /// Source bias score (-1.0 to 1.0, -1 = bearish, 1 = bullish)
362    pub bias_score: Option<f64>,
363    /// Whether source is verified/trusted
364    pub is_verified: bool,
365    /// Source logo URL
366    pub logo_url: Option<String>,
367}
368
369/// News category and classification
370#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
371pub struct NewsCategory {
372    /// Primary category (Breaking, Analysis, Opinion, etc.)
373    pub primary: String,
374    /// Sub-category (DeFi, NFT, Regulation, etc.)
375    pub sub_category: Option<String>,
376    /// Article tags
377    pub tags: Vec<String>,
378    /// Geographic relevance
379    pub geographic_scope: Vec<String>,
380    /// Target audience (Retail, Institutional, Developer)
381    pub target_audience: String,
382}
383
384/// Sentiment analysis for news article
385#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
386pub struct NewsSentiment {
387    /// Overall sentiment score (-1.0 to 1.0)
388    pub overall_score: f64,
389    /// Sentiment confidence (0.0 to 1.0)
390    pub confidence: f64,
391    /// Sentiment classification (Bullish, Bearish, Neutral)
392    pub classification: String,
393    /// Sentiment breakdown by topic
394    pub topic_sentiments: HashMap<String, f64>,
395    /// Emotional indicators
396    pub emotions: EmotionalIndicators,
397    /// Key sentiment phrases extracted
398    pub key_phrases: Vec<SentimentPhrase>,
399}
400
401/// Emotional indicators in news content
402#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
403pub struct EmotionalIndicators {
404    /// Fear level (0.0 to 1.0)
405    pub fear: f64,
406    /// Greed level (0.0 to 1.0)
407    pub greed: f64,
408    /// Excitement level (0.0 to 1.0)
409    pub excitement: f64,
410    /// Uncertainty level (0.0 to 1.0)
411    pub uncertainty: f64,
412    /// Urgency level (0.0 to 1.0)
413    pub urgency: f64,
414}
415
416/// Key phrases contributing to sentiment
417#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
418pub struct SentimentPhrase {
419    /// The phrase text
420    pub phrase: String,
421    /// Sentiment contribution (-1.0 to 1.0)
422    pub sentiment_contribution: f64,
423    /// Confidence in this analysis (0.0 to 1.0)
424    pub confidence: f64,
425}
426
427/// Market impact assessment for news
428#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
429pub struct MarketImpact {
430    /// Predicted impact level (High, Medium, Low, Negligible)
431    pub impact_level: String,
432    /// Impact score (0-100)
433    pub impact_score: u32,
434    /// Time horizon for impact (Immediate, Short-term, Long-term)
435    pub time_horizon: String,
436    /// Affected market sectors
437    pub affected_sectors: Vec<String>,
438    /// Potential price impact percentage
439    pub potential_price_impact: Option<f64>,
440    /// Historical correlation with similar news
441    pub historical_correlation: Option<f64>,
442    /// Risk factors identified
443    pub risk_factors: Vec<String>,
444}
445
446/// Entities mentioned in news (people, companies, assets)
447#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
448pub struct NewsEntity {
449    /// Entity name
450    pub name: String,
451    /// Entity type (Person, Company, Cryptocurrency, etc.)
452    pub entity_type: String,
453    /// Relevance to the article (0.0 to 1.0)
454    pub relevance_score: f64,
455    /// Sentiment specifically towards this entity
456    pub sentiment: Option<f64>,
457    /// Number of mentions in the article
458    pub mention_count: u32,
459    /// Context of mentions
460    pub contexts: Vec<String>,
461}
462
463/// Article quality assessment metrics
464#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
465pub struct QualityMetrics {
466    /// Overall quality score (0-100)
467    pub overall_score: u32,
468    /// Content depth assessment
469    pub depth_score: u32,
470    /// Fact-checking score
471    pub factual_accuracy: u32,
472    /// Writing quality score
473    pub writing_quality: u32,
474    /// Source citation quality
475    pub citation_quality: u32,
476    /// Uniqueness vs other articles (0-100)
477    pub uniqueness_score: u32,
478    /// Estimated reading difficulty (1-10)
479    pub reading_difficulty: u32,
480}
481
482/// Social media engagement metrics
483#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
484pub struct SocialMetrics {
485    /// Total social shares
486    pub total_shares: u32,
487    /// Twitter mentions/shares
488    pub twitter_shares: u32,
489    /// Reddit discussions
490    pub reddit_mentions: u32,
491    /// LinkedIn shares
492    pub linkedin_shares: u32,
493    /// Social sentiment (different from article sentiment)
494    pub social_sentiment: f64,
495    /// Viral potential score (0-100)
496    pub viral_score: u32,
497    /// Influencer engagement
498    pub influencer_mentions: u32,
499}
500
501/// Comprehensive news aggregation result
502#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
503pub struct NewsAggregationResult {
504    /// Search query or topic
505    pub topic: String,
506    /// Found news articles
507    pub articles: Vec<NewsArticle>,
508    /// Aggregation metadata
509    pub metadata: AggregationMetadata,
510    /// Market insights from the news
511    pub insights: NewsInsights,
512    /// Trending topics extracted
513    pub trending_topics: Vec<TrendingTopic>,
514    /// Aggregation timestamp
515    pub aggregated_at: DateTime<Utc>,
516}
517
518/// Metadata about the news aggregation process
519#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
520pub struct AggregationMetadata {
521    /// Total articles found across all sources
522    pub total_articles: u32,
523    /// Articles returned after filtering
524    pub returned_articles: u32,
525    /// Sources queried
526    pub sources_queried: Vec<String>,
527    /// Average credibility of returned articles
528    pub avg_credibility: f64,
529    /// Time range covered
530    pub time_range_hours: u32,
531    /// Duplicate articles removed
532    pub duplicates_removed: u32,
533}
534
535/// Insights extracted from news aggregation
536#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
537pub struct NewsInsights {
538    /// Overall market sentiment from news
539    pub overall_sentiment: f64,
540    /// Sentiment trend over time
541    pub sentiment_trend: String, // "Improving", "Declining", "Stable"
542    /// Most mentioned entities
543    pub top_entities: Vec<EntityMention>,
544    /// Dominant themes/topics
545    pub dominant_themes: Vec<String>,
546    /// Geographical distribution of news
547    pub geographic_distribution: HashMap<String, u32>,
548    /// Source diversity metrics
549    pub source_diversity: SourceDiversity,
550    /// Market impact distribution
551    pub impact_distribution: HashMap<String, u32>,
552}
553
554/// Entity mention statistics
555#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
556pub struct EntityMention {
557    /// Entity name
558    pub name: String,
559    /// Number of mentions across articles
560    pub mention_count: u32,
561    /// Average sentiment towards entity
562    pub avg_sentiment: f64,
563    /// Entity type
564    pub entity_type: String,
565    /// Trending status
566    pub is_trending: bool,
567}
568
569/// Source diversity analysis
570#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
571pub struct SourceDiversity {
572    /// Number of unique sources
573    pub unique_sources: u32,
574    /// Source type distribution
575    pub source_types: HashMap<String, u32>,
576    /// Geographic source distribution
577    pub geographic_sources: HashMap<String, u32>,
578    /// Credibility distribution
579    pub credibility_distribution: HashMap<String, u32>, // "High", "Medium", "Low"
580}
581
582/// Trending topic analysis
583#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
584pub struct TrendingTopic {
585    /// Topic name
586    pub topic: String,
587    /// Number of articles mentioning this topic
588    pub article_count: u32,
589    /// Trend velocity (mentions per hour)
590    pub velocity: f64,
591    /// Sentiment towards this topic
592    pub sentiment: f64,
593    /// Related keywords
594    pub related_keywords: Vec<String>,
595    /// Geographic concentration
596    pub geographic_focus: Vec<String>,
597}
598
599/// Breaking news alert
600#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
601pub struct BreakingNewsAlert {
602    /// Alert ID
603    pub id: String,
604    /// Alert severity (Critical, High, Medium, Low)
605    pub severity: String,
606    /// Alert title
607    pub title: String,
608    /// Alert description
609    pub description: String,
610    /// Related articles
611    pub articles: Vec<NewsArticle>,
612    /// Estimated market impact
613    pub estimated_impact: MarketImpact,
614    /// Alert timestamp
615    pub created_at: DateTime<Utc>,
616    /// Alert expiration
617    pub expires_at: Option<DateTime<Utc>>,
618}
619
620impl Default for NewsConfig {
621    fn default() -> Self {
622        Self {
623            newsapi_key: String::default(),
624            cryptopanic_key: String::default(),
625            base_url: "https://newsapi.org/v2".to_string(),
626            max_articles: 50,
627            freshness_hours: 24,
628            min_credibility_score: 60,
629        }
630    }
631}
632
633impl NewsConfig {
634    /// Create NewsConfig from ApplicationContext
635    fn from_context(context: &ApplicationContext) -> Self {
636        Self {
637            newsapi_key: context
638                .config
639                .providers
640                .newsapi_key
641                .clone()
642                .unwrap_or_default(),
643            cryptopanic_key: context
644                .config
645                .providers
646                .cryptopanic_key
647                .clone()
648                .unwrap_or_default(),
649            base_url: "https://newsapi.org/v2".to_string(),
650            max_articles: 50,
651            freshness_hours: 24,
652            min_credibility_score: 60,
653        }
654    }
655}
656
657/// Get comprehensive cryptocurrency news for a specific topic
658///
659/// This tool aggregates news from multiple sources, performs sentiment analysis,
660/// and assesses market impact for cryptocurrency-related topics.
661#[tool]
662pub async fn get_crypto_news(
663    context: &riglr_core::provider::ApplicationContext,
664    topic: String,
665    time_window: Option<String>,       // "1h", "6h", "24h", "week"
666    source_types: Option<Vec<String>>, // "mainstream", "crypto", "analysis"
667    min_credibility: Option<u32>,
668    include_analysis: Option<bool>,
669) -> crate::error::Result<NewsAggregationResult> {
670    debug!(
671        "Aggregating crypto news for topic: '{}' within {}",
672        topic,
673        time_window.as_deref().unwrap_or("24h")
674    );
675
676    let config = NewsConfig::from_context(context);
677    if config.newsapi_key.is_empty() && config.cryptopanic_key.is_empty() {
678        return Err(WebToolError::Auth(
679            "No news API keys configured".to_string(),
680        ));
681    }
682
683    let client = WebClient::default();
684
685    // Query multiple news sources
686    let mut all_articles = Vec::new();
687    let mut sources_queried = Vec::new();
688
689    // NewsAPI.org for mainstream coverage
690    if !config.newsapi_key.is_empty() {
691        match query_newsapi(&client, &config, &topic, &time_window).await {
692            Ok(mut articles) => {
693                all_articles.append(&mut articles);
694                sources_queried.push("NewsAPI".to_string());
695            }
696            Err(e) => warn!("Failed to query NewsAPI: {}", e),
697        }
698    }
699
700    // CryptoPanic for crypto-specific news
701    if !config.cryptopanic_key.is_empty() {
702        match query_cryptopanic(&client, &config, &topic, &time_window).await {
703            Ok(mut articles) => {
704                all_articles.append(&mut articles);
705                sources_queried.push("CryptoPanic".to_string());
706            }
707            Err(e) => warn!("Failed to query CryptoPanic: {}", e),
708        }
709    }
710
711    // Filter by source types if specified
712    if let Some(types) = source_types {
713        all_articles.retain(|article| types.contains(&article.source.category.to_lowercase()));
714    }
715
716    // Filter by minimum credibility
717    let min_cred = min_credibility.unwrap_or(config.min_credibility_score);
718    all_articles.retain(|article| article.source.credibility_score >= min_cred);
719
720    // Remove duplicates and sort by recency
721    let articles = deduplicate_articles(all_articles);
722
723    // Generate insights if requested
724    let insights = if include_analysis.unwrap_or(true) {
725        analyze_news_collection(&articles).await?
726    } else {
727        NewsInsights {
728            overall_sentiment: 0.0,
729            sentiment_trend: "Unknown".to_string(),
730            top_entities: vec![],
731            dominant_themes: vec![],
732            geographic_distribution: HashMap::new(),
733            source_diversity: SourceDiversity {
734                unique_sources: 0,
735                source_types: HashMap::new(),
736                geographic_sources: HashMap::new(),
737                credibility_distribution: HashMap::new(),
738            },
739            impact_distribution: HashMap::new(),
740        }
741    };
742
743    // Extract trending topics
744    let trending_topics = extract_trending_topics(&articles).await?;
745
746    let result = NewsAggregationResult {
747        topic: topic.clone(),
748        articles: articles.clone(),
749        metadata: AggregationMetadata {
750            total_articles: articles.len() as u32,
751            returned_articles: articles.len() as u32,
752            sources_queried,
753            avg_credibility: calculate_avg_credibility(&articles),
754            time_range_hours: parse_time_window(&time_window.unwrap_or_else(|| "24h".to_string())),
755            duplicates_removed: 0, // Would track actual duplicates
756        },
757        insights,
758        trending_topics,
759        aggregated_at: Utc::now(),
760    };
761
762    info!(
763        "Crypto news aggregation completed: {} articles for '{}'",
764        result.articles.len(),
765        topic
766    );
767
768    Ok(result)
769}
770
771/// Get trending cryptocurrency news across all topics
772///
773/// This tool identifies currently trending news and topics in the cryptocurrency space,
774/// useful for staying updated on breaking developments and market movements.
775#[tool]
776pub async fn get_trending_news(
777    context: &riglr_core::provider::ApplicationContext,
778    time_window: Option<String>,     // "1h", "6h", "24h"
779    categories: Option<Vec<String>>, // "defi", "nft", "regulation", "tech"
780    min_impact_score: Option<u32>,
781    limit: Option<u32>,
782) -> crate::error::Result<NewsAggregationResult> {
783    debug!(
784        "Fetching trending crypto news within {}",
785        time_window.as_deref().unwrap_or("6h")
786    );
787
788    let config = NewsConfig::from_context(context);
789    let client = WebClient::default();
790
791    // Get trending articles from multiple sources
792    let trending_articles = fetch_trending_articles(
793        &client,
794        &config,
795        &time_window,
796        &categories,
797        min_impact_score.unwrap_or(60),
798    )
799    .await?;
800
801    let articles: Vec<NewsArticle> = trending_articles
802        .into_iter()
803        .take(limit.unwrap_or(30) as usize)
804        .collect();
805
806    // Analyze trending patterns
807    let insights = analyze_trending_patterns(&articles).await?;
808    let trending_topics = extract_trending_topics(&articles).await?;
809
810    let result = NewsAggregationResult {
811        topic: "Trending".to_string(),
812        articles: articles.clone(),
813        metadata: AggregationMetadata {
814            total_articles: articles.len() as u32,
815            returned_articles: articles.len() as u32,
816            sources_queried: vec!["Multiple".to_string()],
817            avg_credibility: calculate_avg_credibility(&articles),
818            time_range_hours: parse_time_window(&time_window.unwrap_or_else(|| "6h".to_string())),
819            duplicates_removed: 0,
820        },
821        insights,
822        trending_topics,
823        aggregated_at: Utc::now(),
824    };
825
826    info!(
827        "Trending news aggregation completed: {} trending articles",
828        result.articles.len()
829    );
830
831    Ok(result)
832}
833
834/// Monitor for breaking news and generate real-time alerts
835///
836/// This tool continuously monitors news sources for breaking news
837/// and generates alerts based on severity and market impact criteria.
838#[tool]
839pub async fn monitor_breaking_news(
840    context: &riglr_core::provider::ApplicationContext,
841    keywords: Vec<String>,
842    severity_threshold: Option<String>, // "Critical", "High", "Medium"
843    impact_threshold: Option<u32>,      // 0-100
844    _alert_channels: Option<Vec<String>>, // "webhook", "email", "slack"
845) -> crate::error::Result<Vec<BreakingNewsAlert>> {
846    debug!("Monitoring breaking news for keywords: {:?}", keywords);
847
848    let config = NewsConfig::from_context(context);
849    let client = WebClient::default();
850
851    let mut alerts = Vec::new();
852
853    // Check each keyword for breaking news
854    for keyword in keywords {
855        match detect_breaking_news(&client, &config, &keyword).await {
856            Ok(mut keyword_alerts) => {
857                alerts.append(&mut keyword_alerts);
858            }
859            Err(e) => {
860                warn!("Failed to check breaking news for '{}': {}", keyword, e);
861            }
862        }
863    }
864
865    // Filter by severity and impact thresholds
866    let severity_level = severity_threshold.unwrap_or_else(|| "Medium".to_string());
867    let impact_level = impact_threshold.unwrap_or(60);
868
869    alerts.retain(|alert| {
870        is_above_severity_threshold(&alert.severity, &severity_level)
871            && alert.estimated_impact.impact_score >= impact_level
872    });
873
874    info!(
875        "Breaking news monitoring completed: {} alerts generated",
876        alerts.len()
877    );
878
879    Ok(alerts)
880}
881
882/// Analyze market sentiment from recent news
883///
884/// This tool provides comprehensive sentiment analysis across recent news articles,
885/// helping to gauge overall market mood and potential price impact.
886#[tool]
887pub async fn analyze_market_sentiment(
888    context: &riglr_core::provider::ApplicationContext,
889    time_window: Option<String>,       // "1h", "6h", "24h", "week"
890    asset_filter: Option<Vec<String>>, // Specific cryptocurrencies to focus on
891    _source_weights: Option<HashMap<String, f64>>, // Weight different sources
892    _include_social: Option<bool>,
893) -> crate::error::Result<NewsInsights> {
894    debug!(
895        "Analyzing market sentiment from news over {}",
896        time_window.as_deref().unwrap_or("24h")
897    );
898
899    let _config = NewsConfig::from_context(context);
900    let _client = WebClient::default();
901
902    // Gather recent news for sentiment analysis
903    let recent_news = if let Some(assets) = &asset_filter {
904        let mut all_news = Vec::new();
905        for asset in assets {
906            match get_crypto_news(
907                context,
908                asset.clone(),
909                time_window.clone(),
910                None,
911                Some(70),    // Higher credibility for sentiment analysis
912                Some(false), // Don't need full analysis
913            )
914            .await
915            {
916                Ok(result) => all_news.extend(result.articles),
917                Err(e) => warn!("Failed to get news for {}: {}", asset, e),
918            }
919        }
920        all_news
921    } else {
922        // Get general market news
923        match get_trending_news(context, time_window, None, Some(50), Some(100)).await {
924            Ok(result) => result.articles,
925            Err(_) => vec![], // Fallback to empty if trending fails
926        }
927    };
928
929    // Perform comprehensive sentiment analysis
930    let insights = analyze_news_collection(&recent_news).await?;
931
932    info!(
933        "Market sentiment analysis completed from {} articles",
934        recent_news.len()
935    );
936
937    Ok(insights)
938}
939
940/// Query NewsAPI for articles
941async fn query_newsapi(
942    client: &WebClient,
943    config: &NewsConfig,
944    topic: &str,
945    time_window: &Option<String>,
946) -> crate::error::Result<Vec<NewsArticle>> {
947    // Build URL and params for NewsAPI /v2/everything
948    let url = format!("{}/everything", config.base_url);
949    let window = time_window
950        .clone()
951        .unwrap_or_else(|| format!("{}h", config.freshness_hours));
952    let hours = parse_time_window(&window) as i64;
953    let from = (Utc::now() - chrono::Duration::hours(hours)).to_rfc3339();
954
955    let mut params = std::collections::HashMap::new();
956    params.insert("q".to_string(), topic.to_string());
957    params.insert("language".to_string(), "en".to_string());
958    params.insert("sortBy".to_string(), "publishedAt".to_string());
959    params.insert("from".to_string(), from);
960    params.insert("pageSize".to_string(), config.max_articles.to_string());
961
962    // NewsAPI supports header X-Api-Key or apiKey parameter; prefer header
963    let mut headers = std::collections::HashMap::new();
964    headers.insert("X-Api-Key".to_string(), config.newsapi_key.clone());
965
966    let resp_text = client
967        .get_with_params_and_headers(&url, &params, headers)
968        .await
969        .map_err(|e| WebToolError::Api(format!("NewsAPI request failed: {}", e)))?;
970
971    // Parse JSON and map to NewsArticle
972    let json: serde_json::Value = serde_json::from_str(&resp_text)
973        .map_err(|e| WebToolError::Parsing(format!("NewsAPI parse error: {}", e)))?;
974
975    if let Some(status) = json.get("status").and_then(|s| s.as_str()) {
976        if status != "ok" {
977            let msg = json
978                .get("message")
979                .and_then(|m| m.as_str())
980                .unwrap_or("unknown error");
981            return Err(WebToolError::Api(format!("NewsAPI error: {}", msg)));
982        }
983    }
984
985    let mut articles_out: Vec<NewsArticle> = Vec::new();
986    if let Some(arr) = json.get("articles").and_then(|a| a.as_array()) {
987        for a in arr {
988            let title = a
989                .get("title")
990                .and_then(|v| v.as_str())
991                .unwrap_or("")
992                .to_string();
993            let url = a
994                .get("url")
995                .and_then(|v| v.as_str())
996                .unwrap_or("")
997                .to_string();
998            if url.is_empty() || title.is_empty() {
999                continue;
1000            }
1001            let published_at = a
1002                .get("publishedAt")
1003                .and_then(|v| v.as_str())
1004                .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
1005                .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
1006            let description = a
1007                .get("description")
1008                .and_then(|v| v.as_str())
1009                .map(|s| s.to_string());
1010            let content = a
1011                .get("content")
1012                .and_then(|v| v.as_str())
1013                .map(|s| s.to_string());
1014            let source_obj = a.get("source").cloned().unwrap_or_default();
1015            let source = NewsSource {
1016                id: source_obj
1017                    .get("id")
1018                    .and_then(|v| v.as_str())
1019                    .unwrap_or("")
1020                    .to_string(),
1021                name: source_obj
1022                    .get("name")
1023                    .and_then(|v| v.as_str())
1024                    .unwrap_or("NewsAPI")
1025                    .to_string(),
1026                url: url.clone(),
1027                category: "Mainstream".to_string(),
1028                credibility_score: 75,
1029                accuracy_rating: None,
1030                bias_score: None,
1031                is_verified: true,
1032                logo_url: None,
1033            };
1034            // Basic classification heuristics
1035            let category = NewsCategory {
1036                primary: "News".to_string(),
1037                sub_category: None,
1038                tags: vec![topic.to_lowercase()],
1039                geographic_scope: vec!["Global".to_string()],
1040                target_audience: "Retail".to_string(),
1041            };
1042            // Perform real sentiment analysis on title and content
1043            let sentiment = analyze_sentiment(&title, &description, &content);
1044
1045            // Calculate market impact based on sentiment and source credibility
1046            let market_impact = calculate_market_impact(&sentiment, &source, &category);
1047
1048            // Extract entities from title and content
1049            let entities = extract_entities_from_text(&title, &description, &content, topic);
1050            let article = NewsArticle {
1051                id: format!("newsapi_{}_{}", published_at.timestamp(), hash64(&url)),
1052                title,
1053                url,
1054                description,
1055                content,
1056                published_at,
1057                source,
1058                category,
1059                sentiment,
1060                market_impact,
1061                entities,
1062                related_assets: vec![topic.to_lowercase()],
1063                quality_metrics: QualityMetrics {
1064                    overall_score: 70,
1065                    depth_score: 60,
1066                    factual_accuracy: 75,
1067                    writing_quality: 70,
1068                    citation_quality: 60,
1069                    uniqueness_score: 50,
1070                    reading_difficulty: 5,
1071                },
1072                social_metrics: None,
1073            };
1074            articles_out.push(article);
1075        }
1076    }
1077    Ok(articles_out)
1078}
1079
1080/// Query CryptoPanic for crypto-specific news
1081async fn query_cryptopanic(
1082    client: &WebClient,
1083    config: &NewsConfig,
1084    topic: &str,
1085    time_window: &Option<String>,
1086) -> crate::error::Result<Vec<NewsArticle>> {
1087    let base = "https://cryptopanic.com/api/v1/posts";
1088    let window = time_window.clone().unwrap_or_else(|| "24h".to_string());
1089    let _hours = parse_time_window(&window);
1090
1091    let mut params = std::collections::HashMap::new();
1092    params.insert("auth_token".to_string(), config.cryptopanic_key.clone());
1093    params.insert("kind".to_string(), "news".to_string());
1094    params.insert("currencies".to_string(), topic.to_string());
1095    params.insert("public".to_string(), "true".to_string());
1096    params.insert("filter".to_string(), "rising".to_string());
1097
1098    let resp_text = client
1099        .get_with_params(base, &params)
1100        .await
1101        .map_err(|e| WebToolError::Api(format!("CryptoPanic request failed: {}", e)))?;
1102
1103    let json: serde_json::Value = serde_json::from_str(&resp_text)
1104        .map_err(|e| WebToolError::Parsing(format!("CryptoPanic parse error: {}", e)))?;
1105
1106    let mut articles_out = Vec::new();
1107    if let Some(results) = json.get("results").and_then(|v| v.as_array()) {
1108        for item in results {
1109            let title = item
1110                .get("title")
1111                .and_then(|v| v.as_str())
1112                .unwrap_or("")
1113                .to_string();
1114            let url = item
1115                .get("url")
1116                .and_then(|v| v.as_str())
1117                .unwrap_or("")
1118                .to_string();
1119            if url.is_empty() || title.is_empty() {
1120                continue;
1121            }
1122            let published_at = item
1123                .get("published_at")
1124                .and_then(|v| v.as_str())
1125                .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
1126                .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
1127            let domain = item.get("domain").and_then(|v| v.as_str()).unwrap_or("");
1128            let source_obj = item.get("source").cloned().unwrap_or_default();
1129            let source = NewsSource {
1130                id: source_obj
1131                    .get("domain")
1132                    .and_then(|v| v.as_str())
1133                    .unwrap_or(domain)
1134                    .to_string(),
1135                name: source_obj
1136                    .get("title")
1137                    .and_then(|v| v.as_str())
1138                    .unwrap_or("CryptoPanic")
1139                    .to_string(),
1140                url: url.clone(),
1141                category: "Crypto".to_string(),
1142                credibility_score: 70,
1143                accuracy_rating: None,
1144                bias_score: None,
1145                is_verified: true,
1146                logo_url: None,
1147            };
1148            let category = NewsCategory {
1149                primary: "News".to_string(),
1150                sub_category: None,
1151                tags: vec![topic.to_lowercase()],
1152                geographic_scope: vec!["Global".to_string()],
1153                target_audience: "Crypto".to_string(),
1154            };
1155            let article = NewsArticle {
1156                id: format!("cryptopanic_{}_{}", published_at.timestamp(), hash64(&url)),
1157                title: title.clone(),
1158                url,
1159                description: None,
1160                content: None,
1161                published_at,
1162                source,
1163                category,
1164                sentiment: analyze_sentiment(&title, &None, &None),
1165                market_impact: calculate_market_impact_simple(&title),
1166                entities: extract_entities_from_text(&title, &None, &None, topic),
1167                related_assets: vec![topic.to_lowercase()],
1168                quality_metrics: QualityMetrics {
1169                    overall_score: 68,
1170                    depth_score: 55,
1171                    factual_accuracy: 70,
1172                    writing_quality: 65,
1173                    citation_quality: 55,
1174                    uniqueness_score: 50,
1175                    reading_difficulty: 5,
1176                },
1177                social_metrics: None,
1178            };
1179            articles_out.push(article);
1180        }
1181    }
1182    Ok(articles_out)
1183}
1184
1185/// Remove duplicate articles based on content similarity
1186fn deduplicate_articles(articles: Vec<NewsArticle>) -> Vec<NewsArticle> {
1187    // In production, would use content similarity algorithms
1188    // For now, simple URL-based deduplication
1189    let mut seen_urls = std::collections::HashSet::new();
1190    articles
1191        .into_iter()
1192        .filter(|article| seen_urls.insert(article.url.clone()))
1193        .collect()
1194}
1195
1196/// Analyze a collection of news articles for insights
1197async fn analyze_news_collection(articles: &[NewsArticle]) -> crate::error::Result<NewsInsights> {
1198    let overall_sentiment = articles
1199        .iter()
1200        .map(|a| a.sentiment.overall_score)
1201        .sum::<f64>()
1202        / articles.len() as f64;
1203
1204    let mut entity_mentions: HashMap<String, (u32, f64)> = HashMap::new();
1205    let mut themes = Vec::new();
1206    let mut geo_distribution = HashMap::new();
1207
1208    for article in articles {
1209        // Collect entity mentions
1210        for entity in &article.entities {
1211            let entry = entity_mentions
1212                .entry(entity.name.clone())
1213                .or_insert((0, 0.0));
1214            entry.0 += entity.mention_count;
1215            entry.1 += entity.sentiment.unwrap_or(0.0);
1216        }
1217
1218        // Collect themes
1219        themes.extend(article.category.tags.clone());
1220
1221        // Geographic distribution
1222        for geo in &article.category.geographic_scope {
1223            *geo_distribution.entry(geo.clone()).or_insert(0) += 1;
1224        }
1225    }
1226
1227    let top_entities: Vec<EntityMention> = entity_mentions
1228        .into_iter()
1229        .map(|(name, (count, sentiment))| EntityMention {
1230            name: name.clone(),
1231            mention_count: count,
1232            avg_sentiment: sentiment / count as f64,
1233            entity_type: "Unknown".to_string(), // Would determine from context
1234            is_trending: count > 5,             // Simple trending threshold
1235        })
1236        .collect();
1237
1238    // Analyze source diversity
1239    let unique_sources = articles
1240        .iter()
1241        .map(|a| &a.source.name)
1242        .collect::<std::collections::HashSet<_>>()
1243        .len() as u32;
1244
1245    let source_diversity = SourceDiversity {
1246        unique_sources,
1247        source_types: HashMap::new(), // Would calculate from actual data
1248        geographic_sources: HashMap::new(),
1249        credibility_distribution: HashMap::new(),
1250    };
1251
1252    Ok(NewsInsights {
1253        overall_sentiment,
1254        sentiment_trend: determine_sentiment_trend(articles),
1255        top_entities,
1256        dominant_themes: themes,
1257        geographic_distribution: geo_distribution,
1258        source_diversity,
1259        impact_distribution: HashMap::new(), // Would calculate impact distribution
1260    })
1261}
1262
1263/// Extract trending topics from articles
1264async fn extract_trending_topics(
1265    articles: &[NewsArticle],
1266) -> crate::error::Result<Vec<TrendingTopic>> {
1267    let mut topic_counts: HashMap<String, u32> = HashMap::new();
1268    let mut topic_sentiments: HashMap<String, f64> = HashMap::new();
1269
1270    for article in articles {
1271        for tag in &article.category.tags {
1272            *topic_counts.entry(tag.clone()).or_insert(0) += 1;
1273            *topic_sentiments.entry(tag.clone()).or_insert(0.0) += article.sentiment.overall_score;
1274        }
1275    }
1276
1277    let trending_topics: Vec<TrendingTopic> = topic_counts
1278        .into_iter()
1279        .filter(|(_, count)| *count >= 3) // Minimum threshold for trending
1280        .map(|(topic, count)| TrendingTopic {
1281            topic: topic.clone(),
1282            article_count: count,
1283            velocity: count as f64 / 24.0, // Articles per hour (assuming 24h window)
1284            sentiment: topic_sentiments.get(&topic).unwrap_or(&0.0) / count as f64,
1285            related_keywords: vec![], // Would extract related keywords
1286            geographic_focus: vec!["Global".to_string()],
1287        })
1288        .collect();
1289
1290    Ok(trending_topics)
1291}
1292
1293/// Helper functions
1294fn calculate_avg_credibility(articles: &[NewsArticle]) -> f64 {
1295    if articles.is_empty() {
1296        return 0.0;
1297    }
1298    articles
1299        .iter()
1300        .map(|a| a.source.credibility_score as f64)
1301        .sum::<f64>()
1302        / articles.len() as f64
1303}
1304
1305fn parse_time_window(window: &str) -> u32 {
1306    match window {
1307        "1h" => 1,
1308        "6h" => 6,
1309        "24h" => 24,
1310        "week" => 168,
1311        _ => 24,
1312    }
1313}
1314
1315fn determine_sentiment_trend(articles: &[NewsArticle]) -> String {
1316    // Simple trend analysis - would be more sophisticated in production
1317    let avg_sentiment = articles
1318        .iter()
1319        .map(|a| a.sentiment.overall_score)
1320        .sum::<f64>()
1321        / articles.len() as f64;
1322
1323    if avg_sentiment > 0.1 {
1324        "Improving".to_string()
1325    } else if avg_sentiment < -0.1 {
1326        "Declining".to_string()
1327    } else {
1328        "Stable".to_string()
1329    }
1330}
1331
1332async fn fetch_trending_articles(
1333    client: &WebClient,
1334    config: &NewsConfig,
1335    time_window: &Option<String>,
1336    _categories: &Option<Vec<String>>,
1337    _min_impact_score: u32,
1338) -> crate::error::Result<Vec<NewsArticle>> {
1339    // Prefer CryptoPanic trending (rising) plus NewsAPI top-headlines as fallback
1340    let mut out: Vec<NewsArticle> = Vec::new();
1341
1342    if !config.cryptopanic_key.is_empty() {
1343        let mut params = std::collections::HashMap::new();
1344        params.insert("auth_token".to_string(), config.cryptopanic_key.clone());
1345        params.insert("filter".to_string(), "rising".to_string());
1346        params.insert("kind".to_string(), "news".to_string());
1347        params.insert("public".to_string(), "true".to_string());
1348        if let Some(window) = time_window.as_ref() {
1349            let _ = window; // not directly supported
1350        }
1351        if let Ok(resp) = client
1352            .get_with_params("https://cryptopanic.com/api/v1/posts", &params)
1353            .await
1354        {
1355            if let Ok(json) = serde_json::from_str::<serde_json::Value>(&resp) {
1356                if let Some(results) = json.get("results").and_then(|v| v.as_array()) {
1357                    for item in results {
1358                        let title = item
1359                            .get("title")
1360                            .and_then(|v| v.as_str())
1361                            .unwrap_or("")
1362                            .to_string();
1363                        let url = item
1364                            .get("url")
1365                            .and_then(|v| v.as_str())
1366                            .unwrap_or("")
1367                            .to_string();
1368                        if title.is_empty() || url.is_empty() {
1369                            continue;
1370                        }
1371                        let published_at = item
1372                            .get("published_at")
1373                            .and_then(|v| v.as_str())
1374                            .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
1375                            .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
1376                        out.push(NewsArticle {
1377                            id: format!(
1378                                "cp_trending_{}_{}",
1379                                published_at.timestamp(),
1380                                hash64(&url)
1381                            ),
1382                            title: title.clone(),
1383                            url: url.clone(),
1384                            description: None,
1385                            content: None,
1386                            published_at,
1387                            source: NewsSource {
1388                                id: "cryptopanic".to_string(),
1389                                name: "CryptoPanic".to_string(),
1390                                url,
1391                                category: "Crypto".to_string(),
1392                                credibility_score: 70,
1393                                accuracy_rating: None,
1394                                bias_score: None,
1395                                is_verified: true,
1396                                logo_url: None,
1397                            },
1398                            category: NewsCategory {
1399                                primary: "Trending".to_string(),
1400                                sub_category: None,
1401                                tags: vec![],
1402                                geographic_scope: vec!["Global".to_string()],
1403                                target_audience: "Crypto".to_string(),
1404                            },
1405                            sentiment: analyze_sentiment(&title, &None, &None),
1406                            market_impact: calculate_market_impact_simple(&title),
1407                            entities: vec![],
1408                            related_assets: vec![],
1409                            quality_metrics: QualityMetrics {
1410                                overall_score: 65,
1411                                depth_score: 55,
1412                                factual_accuracy: 70,
1413                                writing_quality: 65,
1414                                citation_quality: 55,
1415                                uniqueness_score: 50,
1416                                reading_difficulty: 5,
1417                            },
1418                            social_metrics: None,
1419                        });
1420                    }
1421                }
1422            }
1423        }
1424    }
1425
1426    // Fallback to NewsAPI top-headlines about crypto
1427    if out.is_empty() && !config.newsapi_key.is_empty() {
1428        let url = format!("{}/top-headlines", config.base_url);
1429        let mut params = std::collections::HashMap::new();
1430        params.insert("q".to_string(), "crypto OR bitcoin OR ethereum".to_string());
1431        params.insert("language".to_string(), "en".to_string());
1432        params.insert("pageSize".to_string(), "20".to_string());
1433        let mut headers = std::collections::HashMap::new();
1434        headers.insert("X-Api-Key".to_string(), config.newsapi_key.clone());
1435        if let Ok(resp) = client
1436            .get_with_params_and_headers(&url, &params, headers)
1437            .await
1438        {
1439            if let Ok(json) = serde_json::from_str::<serde_json::Value>(&resp) {
1440                if let Some(arts) = json.get("articles").and_then(|v| v.as_array()) {
1441                    for a in arts {
1442                        let title = a
1443                            .get("title")
1444                            .and_then(|v| v.as_str())
1445                            .unwrap_or("")
1446                            .to_string();
1447                        let url = a
1448                            .get("url")
1449                            .and_then(|v| v.as_str())
1450                            .unwrap_or("")
1451                            .to_string();
1452                        if title.is_empty() || url.is_empty() {
1453                            continue;
1454                        }
1455                        let published_at = a
1456                            .get("publishedAt")
1457                            .and_then(|v| v.as_str())
1458                            .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
1459                            .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
1460                        // Parse and analyze the article properly
1461                        let description = a
1462                            .get("description")
1463                            .and_then(|v| v.as_str())
1464                            .map(|s| s.to_string());
1465                        let content = a
1466                            .get("content")
1467                            .and_then(|v| v.as_str())
1468                            .map(|s| s.to_string());
1469                        let source_name = a
1470                            .get("source")
1471                            .and_then(|o| o.get("name"))
1472                            .and_then(|v| v.as_str())
1473                            .unwrap_or("NewsAPI");
1474
1475                        let source = NewsSource {
1476                            id: format!("newsapi_{}", hash64(&url)),
1477                            name: source_name.to_string(),
1478                            url: url.clone(),
1479                            category: "Mainstream".to_string(),
1480                            credibility_score: 75,
1481                            accuracy_rating: None,
1482                            bias_score: None,
1483                            is_verified: true,
1484                            logo_url: None,
1485                        };
1486
1487                        let category = NewsCategory {
1488                            primary: "Trending".to_string(),
1489                            sub_category: None,
1490                            tags: extract_tags_from_text(&title, &description),
1491                            geographic_scope: vec!["Global".to_string()],
1492                            target_audience: "Retail".to_string(),
1493                        };
1494
1495                        out.push(NewsArticle {
1496                            id: format!(
1497                                "newsapi_trending_{}_{}",
1498                                published_at.timestamp(),
1499                                hash64(&url)
1500                            ),
1501                            title: title.clone(),
1502                            url,
1503                            description: description.clone(),
1504                            content: content.clone(),
1505                            published_at,
1506                            source,
1507                            category,
1508                            sentiment: analyze_sentiment(&title, &description, &content),
1509                            market_impact: calculate_market_impact_from_content(
1510                                &title,
1511                                &description,
1512                                &content,
1513                            ),
1514                            entities: extract_entities_from_text(
1515                                &title,
1516                                &description,
1517                                &content,
1518                                "crypto",
1519                            ),
1520                            related_assets: extract_crypto_mentions(&title, &description, &content),
1521                            quality_metrics: calculate_quality_metrics(
1522                                &title,
1523                                &description,
1524                                &content,
1525                                75,
1526                            ),
1527                            social_metrics: None,
1528                        });
1529                    }
1530                }
1531            }
1532        }
1533    }
1534
1535    Ok(out)
1536}
1537
1538fn hash64(s: &str) -> u64 {
1539    let mut hasher = std::collections::hash_map::DefaultHasher::default();
1540    s.hash(&mut hasher);
1541    hasher.finish()
1542}
1543
1544async fn analyze_trending_patterns(articles: &[NewsArticle]) -> crate::error::Result<NewsInsights> {
1545    // Similar to analyze_news_collection but with trending-specific logic
1546    analyze_news_collection(articles).await
1547}
1548
1549async fn detect_breaking_news(
1550    client: &WebClient,
1551    config: &NewsConfig,
1552    keyword: &str,
1553) -> crate::error::Result<Vec<BreakingNewsAlert>> {
1554    // Heuristic: fetch very recent items and detect urgency keywords
1555    let mut alerts: Vec<BreakingNewsAlert> = Vec::new();
1556
1557    let mut articles: Vec<NewsArticle> = Vec::new();
1558    if !config.newsapi_key.is_empty() {
1559        if let Ok(mut a) = query_newsapi(client, config, keyword, &Some("1h".to_string())).await {
1560            articles.append(&mut a);
1561        }
1562    }
1563    if !config.cryptopanic_key.is_empty() {
1564        if let Ok(mut a) = query_cryptopanic(client, config, keyword, &Some("1h".to_string())).await
1565        {
1566            articles.append(&mut a);
1567        }
1568    }
1569
1570    // Filter very recent (<= 2h) and containing strong terms
1571    let urgent_terms = [
1572        "breaking",
1573        "urgent",
1574        "exploit",
1575        "hack",
1576        "outage",
1577        "halt",
1578        "SEC",
1579        "lawsuit",
1580        "bankrupt",
1581        "halted",
1582        "paused",
1583        "breach",
1584        "attack",
1585        "flash loan",
1586        "rug",
1587    ];
1588    let now = Utc::now();
1589    let mut grouped: Vec<NewsArticle> = Vec::new();
1590    for a in articles.into_iter() {
1591        if (now - a.published_at) <= chrono::Duration::hours(2) {
1592            let hay = format!(
1593                "{} {} {}",
1594                a.title,
1595                a.description.clone().unwrap_or_default(),
1596                a.url
1597            );
1598            if urgent_terms
1599                .iter()
1600                .any(|t| hay.to_lowercase().contains(&t.to_lowercase()))
1601            {
1602                grouped.push(a);
1603            }
1604        }
1605    }
1606
1607    if !grouped.is_empty() {
1608        let est_impact = MarketImpact {
1609            impact_level: "High".to_string(),
1610            impact_score: 80,
1611            time_horizon: "Immediate".to_string(),
1612            affected_sectors: vec!["Crypto".to_string()],
1613            potential_price_impact: Some(5.0),
1614            historical_correlation: None,
1615            risk_factors: vec!["Volatility".to_string()],
1616        };
1617        let alert = BreakingNewsAlert {
1618            id: format!("breaking_{}_{}", keyword.to_lowercase(), now.timestamp()),
1619            severity: "High".to_string(),
1620            title: format!("Breaking: {} - {} items", keyword, grouped.len()),
1621            description: format!("Detected urgent developments related to '{}'.", keyword),
1622            articles: grouped,
1623            estimated_impact: est_impact,
1624            created_at: now,
1625            expires_at: Some(now + chrono::Duration::hours(4)),
1626        };
1627        alerts.push(alert);
1628    }
1629
1630    Ok(alerts)
1631}
1632
1633fn is_above_severity_threshold(current_severity: &str, threshold: &str) -> bool {
1634    let severity_order = ["Low", "Medium", "High", "Critical"];
1635    let current_index = severity_order
1636        .iter()
1637        .position(|&s| s == current_severity)
1638        .unwrap_or(0);
1639    let threshold_index = severity_order
1640        .iter()
1641        .position(|&s| s == threshold)
1642        .unwrap_or(1);
1643    current_index >= threshold_index
1644}
1645
1646/// Get sentiment analyzer from ApplicationContext or create default
1647#[allow(dead_code)]
1648fn get_sentiment_analyzer(_context: &ApplicationContext) -> Arc<LexiconSentimentAnalyzer> {
1649    // For now, always return the default analyzer
1650    // In the future, this can be extended to get from context extensions
1651    Arc::new(LexiconSentimentAnalyzer::default())
1652}
1653
1654/// Helper function for backward compatibility
1655fn analyze_sentiment(
1656    title: &str,
1657    description: &Option<String>,
1658    content: &Option<String>,
1659) -> NewsSentiment {
1660    // Use the default analyzer for now
1661    let analyzer = LexiconSentimentAnalyzer::default();
1662    analyzer.analyze_sentiment_impl(title, description.as_deref(), content.as_deref())
1663}
1664
1665/// Calculate market impact using heuristic rules based on sentiment and source credibility
1666///
1667/// This is a simplified calculation based on keyword presence and sentiment scores.
1668/// For production use, consider training a model on historical market reactions.
1669fn calculate_market_impact(
1670    sentiment: &NewsSentiment,
1671    source: &NewsSource,
1672    category: &NewsCategory,
1673) -> MarketImpact {
1674    // Base impact score from sentiment magnitude and confidence
1675    let sentiment_impact = (sentiment.overall_score.abs() * 100.0 * sentiment.confidence) as u32;
1676
1677    // Adjust for source credibility
1678    let credibility_factor = source.credibility_score as f64 / 100.0;
1679    let base_score = (sentiment_impact as f64 * credibility_factor) as u32;
1680
1681    // Category-based adjustments
1682    let category_multiplier = match category.primary.as_str() {
1683        "Breaking" => 1.5,
1684        "Regulation" => 1.4,
1685        "Security" => 1.3,
1686        "Analysis" => 1.1,
1687        _ => 1.0,
1688    };
1689
1690    let impact_score = ((base_score as f64 * category_multiplier).min(100.0)) as u32;
1691
1692    // Determine impact level
1693    let impact_level = match impact_score {
1694        80..=100 => "Critical",
1695        60..=79 => "High",
1696        40..=59 => "Medium",
1697        20..=39 => "Low",
1698        _ => "Negligible",
1699    }
1700    .to_string();
1701
1702    // Time horizon based on urgency and category
1703    let time_horizon = if sentiment.emotions.urgency > 0.7 || category.primary == "Breaking" {
1704        "Immediate"
1705    } else if impact_score > 60 {
1706        "Short-term"
1707    } else {
1708        "Medium-term"
1709    }
1710    .to_string();
1711
1712    // Identify affected sectors based on tags and content
1713    let mut affected_sectors = Vec::new();
1714    if category.tags.iter().any(|t| t.contains("defi")) {
1715        affected_sectors.push("DeFi".to_string());
1716    }
1717    if category.tags.iter().any(|t| t.contains("nft")) {
1718        affected_sectors.push("NFT".to_string());
1719    }
1720    if category.tags.iter().any(|t| t.contains("exchange")) {
1721        affected_sectors.push("CEX".to_string());
1722    }
1723    if category.tags.iter().any(|t| t.contains("regulation")) {
1724        affected_sectors.push("Regulatory".to_string());
1725    }
1726    if affected_sectors.is_empty() {
1727        affected_sectors.push("General".to_string());
1728    }
1729
1730    // Estimate potential price impact
1731    let potential_price_impact = if impact_score > 70 {
1732        Some((sentiment.overall_score * 10.0).abs())
1733    } else if impact_score > 50 {
1734        Some((sentiment.overall_score * 5.0).abs())
1735    } else {
1736        None
1737    };
1738
1739    // Identify risk factors
1740    let mut risk_factors = Vec::new();
1741    if sentiment.emotions.uncertainty > 0.6 {
1742        risk_factors.push("High uncertainty".to_string());
1743    }
1744    if sentiment.emotions.fear > 0.6 {
1745        risk_factors.push("Market fear".to_string());
1746    }
1747    if category
1748        .tags
1749        .iter()
1750        .any(|t| t.contains("hack") || t.contains("exploit"))
1751    {
1752        risk_factors.push("Security breach".to_string());
1753    }
1754    if category.tags.iter().any(|t| t.contains("regulation")) {
1755        risk_factors.push("Regulatory risk".to_string());
1756    }
1757
1758    MarketImpact {
1759        impact_level,
1760        impact_score,
1761        time_horizon,
1762        affected_sectors,
1763        potential_price_impact,
1764        historical_correlation: None, // Would need historical data
1765        risk_factors,
1766    }
1767}
1768
1769/// Simpler market impact calculation for when we only have title
1770fn calculate_market_impact_simple(title: &str) -> MarketImpact {
1771    let title_lower = title.to_lowercase();
1772
1773    // High impact keywords
1774    let high_impact = [
1775        "hack",
1776        "exploit",
1777        "sec",
1778        "ban",
1779        "crash",
1780        "surge",
1781        "partnership",
1782        "adoption",
1783    ];
1784    let medium_impact = ["update", "launch", "announce", "report", "analysis"];
1785
1786    let (impact_level, impact_score) = if high_impact.iter().any(|k| title_lower.contains(k)) {
1787        ("High".to_string(), 70)
1788    } else if medium_impact.iter().any(|k| title_lower.contains(k)) {
1789        ("Medium".to_string(), 50)
1790    } else {
1791        ("Low".to_string(), 30)
1792    };
1793
1794    MarketImpact {
1795        impact_level,
1796        impact_score,
1797        time_horizon: "Short-term".to_string(),
1798        affected_sectors: vec!["General".to_string()],
1799        potential_price_impact: None,
1800        historical_correlation: None,
1801        risk_factors: vec![],
1802    }
1803}
1804
1805/// Extract entities from text using regex pattern matching
1806///
1807/// This is a heuristic approach that matches against predefined entity patterns.
1808/// Only recognizes entities in the hardcoded list. For production use, consider
1809/// integrating with a proper NER (Named Entity Recognition) model.
1810fn extract_entities_from_text(
1811    title: &str,
1812    description: &Option<String>,
1813    content: &Option<String>,
1814    default_topic: &str,
1815) -> Vec<NewsEntity> {
1816    let full_text = format!(
1817        "{} {} {}",
1818        title,
1819        description.as_deref().unwrap_or(""),
1820        content.as_deref().unwrap_or("")
1821    );
1822
1823    let mut entities = Vec::new();
1824    let mut entity_map: HashMap<String, (String, u32)> = HashMap::new(); // name -> (type, count)
1825
1826    // Cryptocurrency patterns
1827    let crypto_pattern = r"\b(Bitcoin|BTC|Ethereum|ETH|Solana|SOL|Cardano|ADA|Polkadot|DOT|Chainlink|LINK|Avalanche|AVAX|Polygon|MATIC|Arbitrum|ARB|Optimism|OP)\b";
1828    if let Ok(re) = Regex::new(crypto_pattern) {
1829        for cap in re.captures_iter(&full_text) {
1830            if let Some(matched) = cap.get(0) {
1831                let name = matched.as_str();
1832                let entry = entity_map
1833                    .entry(name.to_string())
1834                    .or_insert(("Cryptocurrency".to_string(), 0));
1835                entry.1 += 1;
1836            }
1837        }
1838    }
1839
1840    // Company patterns
1841    let company_pattern = r"\b(Coinbase|Binance|Kraken|FTX|OpenSea|Uniswap|Aave|Compound|MakerDAO|Circle|Tether|Block\.one|ConsenSys|Ripple|Grayscale|MicroStrategy|Tesla|Square|PayPal)\b";
1842    if let Ok(re) = Regex::new(company_pattern) {
1843        for cap in re.captures_iter(&full_text) {
1844            if let Some(matched) = cap.get(0) {
1845                let name = matched.as_str();
1846                let entry = entity_map
1847                    .entry(name.to_string())
1848                    .or_insert(("Company".to_string(), 0));
1849                entry.1 += 1;
1850            }
1851        }
1852    }
1853
1854    // Person patterns (common crypto figures)
1855    let person_pattern = r"\b(Vitalik Buterin|Satoshi Nakamoto|CZ|Changpeng Zhao|Sam Bankman-Fried|SBF|Michael Saylor|Elon Musk|Gary Gensler|Jerome Powell)\b";
1856    if let Ok(re) = Regex::new(person_pattern) {
1857        for cap in re.captures_iter(&full_text) {
1858            if let Some(matched) = cap.get(0) {
1859                let name = matched.as_str();
1860                let entry = entity_map
1861                    .entry(name.to_string())
1862                    .or_insert(("Person".to_string(), 0));
1863                entry.1 += 1;
1864            }
1865        }
1866    }
1867
1868    // Protocol/Platform patterns
1869    let protocol_pattern =
1870        r"\b(DeFi|NFT|DAO|DEX|CEX|Layer 2|L2|zkSync|StarkNet|Lightning Network|Cosmos|IBC)\b";
1871    if let Ok(re) = Regex::new(protocol_pattern) {
1872        for cap in re.captures_iter(&full_text) {
1873            if let Some(matched) = cap.get(0) {
1874                let name = matched.as_str();
1875                let entry = entity_map
1876                    .entry(name.to_string())
1877                    .or_insert(("Protocol".to_string(), 0));
1878                entry.1 += 1;
1879            }
1880        }
1881    }
1882
1883    // Convert map to entities vector
1884    for (name, (entity_type, count)) in entity_map {
1885        let relevance_score = (count as f64 / 10.0).min(1.0);
1886        entities.push(NewsEntity {
1887            name: name.clone(),
1888            entity_type,
1889            relevance_score,
1890            sentiment: None, // Would need entity-specific sentiment analysis
1891            mention_count: count,
1892            contexts: vec![], // Would need to extract surrounding context
1893        });
1894    }
1895
1896    // Add default topic if no entities found
1897    if entities.is_empty() {
1898        entities.push(NewsEntity {
1899            name: default_topic.to_string(),
1900            entity_type: "Topic".to_string(),
1901            relevance_score: 0.5,
1902            sentiment: None,
1903            mention_count: 1,
1904            contexts: vec![],
1905        });
1906    }
1907
1908    // Sort by relevance
1909    entities.sort_by(|a, b| {
1910        b.relevance_score
1911            .partial_cmp(&a.relevance_score)
1912            .unwrap_or(std::cmp::Ordering::Equal)
1913    });
1914
1915    entities
1916}
1917
1918/// Extract tags from text
1919fn extract_tags_from_text(title: &str, description: &Option<String>) -> Vec<String> {
1920    let full_text = format!(
1921        "{} {}",
1922        title.to_lowercase(),
1923        description.as_deref().unwrap_or("").to_lowercase()
1924    );
1925
1926    let mut tags = Vec::new();
1927
1928    // Topic keywords to tags
1929    let tag_keywords = [
1930        ("defi", "defi"),
1931        ("nft", "nft"),
1932        ("metaverse", "metaverse"),
1933        ("web3", "web3"),
1934        ("layer 2", "layer2"),
1935        ("stablecoin", "stablecoin"),
1936        ("cbdc", "cbdc"),
1937        ("mining", "mining"),
1938        ("staking", "staking"),
1939        ("governance", "governance"),
1940        ("dao", "dao"),
1941        ("smart contract", "smart-contracts"),
1942        ("regulation", "regulation"),
1943        ("sec", "regulation"),
1944        ("hack", "security"),
1945        ("exploit", "security"),
1946        ("partnership", "partnership"),
1947        ("integration", "integration"),
1948        ("upgrade", "upgrade"),
1949        ("mainnet", "mainnet"),
1950        ("testnet", "testnet"),
1951    ];
1952
1953    for (keyword, tag) in &tag_keywords {
1954        if full_text.contains(keyword) {
1955            tags.push(tag.to_string());
1956        }
1957    }
1958
1959    // Remove duplicates
1960    tags.sort();
1961    tags.dedup();
1962
1963    tags
1964}
1965
1966/// Extract cryptocurrency mentions from text
1967fn extract_crypto_mentions(
1968    title: &str,
1969    description: &Option<String>,
1970    content: &Option<String>,
1971) -> Vec<String> {
1972    let full_text = format!(
1973        "{} {} {}",
1974        title.to_lowercase(),
1975        description.as_deref().unwrap_or("").to_lowercase(),
1976        content.as_deref().unwrap_or("").to_lowercase()
1977    );
1978
1979    let mut cryptos = Vec::new();
1980
1981    let crypto_list = [
1982        ("bitcoin", "bitcoin"),
1983        ("btc", "bitcoin"),
1984        ("ethereum", "ethereum"),
1985        ("eth", "ethereum"),
1986        ("solana", "solana"),
1987        ("sol", "solana"),
1988        ("cardano", "cardano"),
1989        ("ada", "cardano"),
1990        ("polkadot", "polkadot"),
1991        ("dot", "polkadot"),
1992        ("chainlink", "chainlink"),
1993        ("link", "chainlink"),
1994        ("avalanche", "avalanche"),
1995        ("avax", "avalanche"),
1996        ("polygon", "polygon"),
1997        ("matic", "polygon"),
1998        ("arbitrum", "arbitrum"),
1999        ("optimism", "optimism"),
2000        ("bnb", "bnb"),
2001        ("xrp", "xrp"),
2002        ("doge", "dogecoin"),
2003        ("shib", "shiba-inu"),
2004    ];
2005
2006    for (keyword, crypto) in &crypto_list {
2007        if full_text.contains(keyword) && !cryptos.contains(&crypto.to_string()) {
2008            cryptos.push(crypto.to_string());
2009        }
2010    }
2011
2012    cryptos
2013}
2014
2015/// Calculate quality metrics for an article
2016fn calculate_quality_metrics(
2017    title: &str,
2018    description: &Option<String>,
2019    content: &Option<String>,
2020    source_credibility: u32,
2021) -> QualityMetrics {
2022    let has_description = description.is_some() && !description.as_ref().unwrap().is_empty();
2023    let _has_content = content.is_some() && !content.as_ref().unwrap().is_empty();
2024
2025    // Content depth based on length and structure
2026    let content_length = content.as_ref().map_or(0, |c| c.len());
2027    let depth_score = if content_length > 2000 {
2028        85
2029    } else if content_length > 1000 {
2030        70
2031    } else if content_length > 500 {
2032        55
2033    } else if has_description {
2034        40
2035    } else {
2036        25
2037    };
2038
2039    // Writing quality based on title and description
2040    let title_words = title.split_whitespace().count();
2041    let writing_quality = if title_words > 5 && title_words < 20 && has_description {
2042        75
2043    } else if title_words > 3 {
2044        65
2045    } else {
2046        50
2047    };
2048
2049    // Citation quality (would need to detect citations in real implementation)
2050    let citation_quality = if content_length > 1000 { 60 } else { 40 };
2051
2052    // Overall score
2053    let overall_score = ((source_credibility as f64 * 0.3)
2054        + (depth_score as f64 * 0.3)
2055        + (writing_quality as f64 * 0.2)
2056        + (citation_quality as f64 * 0.2)) as u32;
2057
2058    QualityMetrics {
2059        overall_score,
2060        depth_score,
2061        factual_accuracy: source_credibility, // Use source credibility as proxy
2062        writing_quality,
2063        citation_quality,
2064        uniqueness_score: 50, // Would need deduplication analysis
2065        reading_difficulty: if content_length > 2000 { 7 } else { 5 },
2066    }
2067}
2068
2069/// Calculate market impact from content analysis
2070fn calculate_market_impact_from_content(
2071    title: &str,
2072    description: &Option<String>,
2073    content: &Option<String>,
2074) -> MarketImpact {
2075    let sentiment = analyze_sentiment(title, description, content);
2076    let full_text = format!(
2077        "{} {} {}",
2078        title.to_lowercase(),
2079        description.as_deref().unwrap_or("").to_lowercase(),
2080        content.as_deref().unwrap_or("").to_lowercase()
2081    );
2082
2083    // Check for high-impact keywords
2084    let critical_keywords = [
2085        "hack",
2086        "exploit",
2087        "bankrupt",
2088        "sec enforcement",
2089        "criminal",
2090        "fraud",
2091    ];
2092    let high_keywords = [
2093        "partnership",
2094        "adoption",
2095        "integration",
2096        "launch",
2097        "acquisition",
2098    ];
2099    let medium_keywords = ["update", "upgrade", "announce", "report", "analysis"];
2100
2101    let has_critical = critical_keywords.iter().any(|k| full_text.contains(k));
2102    let has_high = high_keywords.iter().any(|k| full_text.contains(k));
2103    let has_medium = medium_keywords.iter().any(|k| full_text.contains(k));
2104
2105    let (impact_level, base_score) = if has_critical {
2106        ("Critical", 85)
2107    } else if has_high {
2108        ("High", 70)
2109    } else if has_medium {
2110        ("Medium", 50)
2111    } else {
2112        ("Low", 30)
2113    };
2114
2115    // Adjust score based on sentiment magnitude
2116    let impact_score =
2117        ((base_score as f64 * (1.0 + sentiment.overall_score.abs() * 0.3)) as u32).min(100);
2118
2119    MarketImpact {
2120        impact_level: impact_level.to_string(),
2121        impact_score,
2122        time_horizon: if has_critical {
2123            "Immediate"
2124        } else {
2125            "Short-term"
2126        }
2127        .to_string(),
2128        affected_sectors: extract_affected_sectors(&full_text),
2129        potential_price_impact: if impact_score > 70 {
2130            Some((sentiment.overall_score * 7.5).abs())
2131        } else if impact_score > 50 {
2132            Some((sentiment.overall_score * 4.0).abs())
2133        } else {
2134            None
2135        },
2136        historical_correlation: None,
2137        risk_factors: extract_risk_factors(&full_text),
2138    }
2139}
2140
2141/// Extract affected sectors from text
2142fn extract_affected_sectors(text: &str) -> Vec<String> {
2143    let mut sectors = Vec::new();
2144
2145    let sector_keywords = [
2146        ("defi", "DeFi"),
2147        ("nft", "NFT"),
2148        ("exchange", "CEX"),
2149        ("dex", "DEX"),
2150        ("stablecoin", "Stablecoins"),
2151        ("mining", "Mining"),
2152        ("layer 2", "Layer2"),
2153        ("lending", "Lending"),
2154        ("derivatives", "Derivatives"),
2155        ("gamefi", "GameFi"),
2156        ("metaverse", "Metaverse"),
2157    ];
2158
2159    for (keyword, sector) in &sector_keywords {
2160        if text.contains(keyword) && !sectors.contains(&sector.to_string()) {
2161            sectors.push(sector.to_string());
2162        }
2163    }
2164
2165    if sectors.is_empty() {
2166        sectors.push("General".to_string());
2167    }
2168
2169    sectors
2170}
2171
2172/// Extract risk factors from text
2173fn extract_risk_factors(text: &str) -> Vec<String> {
2174    let mut risks = Vec::new();
2175
2176    let risk_keywords = [
2177        ("regulation", "Regulatory uncertainty"),
2178        ("sec", "Regulatory action"),
2179        ("hack", "Security vulnerability"),
2180        ("exploit", "Protocol vulnerability"),
2181        ("volatile", "Market volatility"),
2182        ("uncertain", "Market uncertainty"),
2183        ("lawsuit", "Legal risk"),
2184        ("investigation", "Regulatory investigation"),
2185        ("liquidity", "Liquidity risk"),
2186        ("contagion", "Contagion risk"),
2187    ];
2188
2189    for (keyword, risk) in &risk_keywords {
2190        if text.contains(keyword) && !risks.contains(&risk.to_string()) {
2191            risks.push(risk.to_string());
2192        }
2193    }
2194
2195    risks
2196}
2197
2198#[cfg(test)]
2199mod tests {
2200    use super::*;
2201
2202    #[test]
2203    fn test_news_config_default() {
2204        let config = NewsConfig::default();
2205        assert_eq!(config.base_url, "https://newsapi.org/v2");
2206        assert_eq!(config.max_articles, 50);
2207    }
2208
2209    #[test]
2210    fn test_basic_news_functionality() {
2211        // Simple test that verifies basic functionality
2212        let simple_title = "Bitcoin News Test".to_string();
2213        assert!(simple_title.contains("Bitcoin"));
2214
2215        // Test the NewsConfig creation
2216        let config = NewsConfig::default();
2217        assert_eq!(config.base_url, "https://newsapi.org/v2");
2218        assert_eq!(config.max_articles, 50);
2219    }
2220
2221    #[test]
2222    fn test_parse_time_window() {
2223        assert_eq!(parse_time_window("1h"), 1);
2224        assert_eq!(parse_time_window("24h"), 24);
2225        assert_eq!(parse_time_window("week"), 168);
2226    }
2227
2228    #[test]
2229    fn test_severity_threshold() {
2230        assert!(is_above_severity_threshold("High", "Medium"));
2231        assert!(!is_above_severity_threshold("Medium", "High"));
2232        assert!(is_above_severity_threshold("Critical", "High"));
2233    }
2234
2235    // Additional comprehensive tests for 100% coverage
2236
2237    #[test]
2238    fn test_parse_time_window_all_cases() {
2239        assert_eq!(parse_time_window("6h"), 6);
2240        assert_eq!(parse_time_window("invalid"), 24); // Default case
2241        assert_eq!(parse_time_window(""), 24); // Empty string
2242        assert_eq!(parse_time_window("random_text"), 24);
2243    }
2244
2245    #[test]
2246    fn test_hash64_function() {
2247        let test_string = "test_string";
2248        let hash1 = hash64(test_string);
2249        let hash2 = hash64(test_string);
2250        assert_eq!(hash1, hash2); // Same input should produce same hash
2251
2252        let different_hash = hash64("different_string");
2253        assert_ne!(hash1, different_hash); // Different inputs should produce different hashes
2254
2255        let empty_hash = hash64("");
2256        assert_ne!(hash1, empty_hash); // Empty string should produce different hash
2257    }
2258
2259    #[test]
2260    fn test_severity_threshold_edge_cases() {
2261        // Test all valid severity levels
2262        assert!(is_above_severity_threshold("Critical", "Critical"));
2263        assert!(is_above_severity_threshold("High", "High"));
2264        assert!(is_above_severity_threshold("Medium", "Medium"));
2265        assert!(is_above_severity_threshold("Low", "Low"));
2266
2267        // Test invalid severities (should default to position 0 and 1)
2268        assert!(!is_above_severity_threshold("Invalid", "Medium"));
2269        assert!(is_above_severity_threshold("Medium", "Invalid"));
2270        assert!(is_above_severity_threshold("Invalid", "Invalid"));
2271    }
2272
2273    #[test]
2274    fn test_calculate_avg_credibility() {
2275        // Test with empty articles
2276        let empty_articles: Vec<NewsArticle> = vec![];
2277        assert_eq!(calculate_avg_credibility(&empty_articles), 0.0);
2278
2279        // Test with single article
2280        let single_article = vec![create_test_article_with_credibility(75)];
2281        assert_eq!(calculate_avg_credibility(&single_article), 75.0);
2282
2283        // Test with multiple articles
2284        let multiple_articles = vec![
2285            create_test_article_with_credibility(80),
2286            create_test_article_with_credibility(60),
2287            create_test_article_with_credibility(70),
2288        ];
2289        assert_eq!(calculate_avg_credibility(&multiple_articles), 70.0);
2290    }
2291
2292    #[test]
2293    fn test_deduplicate_articles() {
2294        // Test with no articles
2295        let empty_articles: Vec<NewsArticle> = vec![];
2296        let result = deduplicate_articles(empty_articles);
2297        assert!(result.is_empty());
2298
2299        // Test with unique articles
2300        let unique_articles = vec![
2301            create_test_article_with_url("https://example1.com"),
2302            create_test_article_with_url("https://example2.com"),
2303        ];
2304        let result = deduplicate_articles(unique_articles);
2305        assert_eq!(result.len(), 2);
2306
2307        // Test with duplicate URLs
2308        let duplicate_articles = vec![
2309            create_test_article_with_url("https://example.com"),
2310            create_test_article_with_url("https://example.com"),
2311            create_test_article_with_url("https://different.com"),
2312        ];
2313        let result = deduplicate_articles(duplicate_articles);
2314        assert_eq!(result.len(), 2);
2315    }
2316
2317    #[test]
2318    fn test_determine_sentiment_trend() {
2319        // Test improving trend
2320        let improving_articles = vec![
2321            create_test_article_with_sentiment(0.5),
2322            create_test_article_with_sentiment(0.6),
2323        ];
2324        assert_eq!(determine_sentiment_trend(&improving_articles), "Improving");
2325
2326        // Test declining trend
2327        let declining_articles = vec![
2328            create_test_article_with_sentiment(-0.5),
2329            create_test_article_with_sentiment(-0.6),
2330        ];
2331        assert_eq!(determine_sentiment_trend(&declining_articles), "Declining");
2332
2333        // Test stable trend
2334        let stable_articles = vec![
2335            create_test_article_with_sentiment(0.05),
2336            create_test_article_with_sentiment(-0.05),
2337        ];
2338        assert_eq!(determine_sentiment_trend(&stable_articles), "Stable");
2339
2340        // Test edge cases for thresholds
2341        let edge_positive = vec![create_test_article_with_sentiment(0.1)];
2342        assert_eq!(determine_sentiment_trend(&edge_positive), "Stable");
2343
2344        let edge_negative = vec![create_test_article_with_sentiment(-0.1)];
2345        assert_eq!(determine_sentiment_trend(&edge_negative), "Stable");
2346
2347        let just_above_positive = vec![create_test_article_with_sentiment(0.11)];
2348        assert_eq!(determine_sentiment_trend(&just_above_positive), "Improving");
2349
2350        let just_below_negative = vec![create_test_article_with_sentiment(-0.11)];
2351        assert_eq!(determine_sentiment_trend(&just_below_negative), "Declining");
2352    }
2353
2354    #[test]
2355    fn test_analyze_sentiment_comprehensive() {
2356        // Test positive sentiment
2357        let positive_sentiment = analyze_sentiment(
2358            "Bitcoin surge brings bullish sentiment to crypto markets",
2359            &Some("Strong gains and positive developments".to_string()),
2360            &Some("The rally continues with strong adoption and growth".to_string()),
2361        );
2362        assert!(positive_sentiment.overall_score > 0.0);
2363        assert_eq!(positive_sentiment.classification, "Bullish");
2364        assert!(positive_sentiment.confidence > 0.0);
2365
2366        // Test negative sentiment
2367        let negative_sentiment = analyze_sentiment(
2368            "Bitcoin crash brings bearish sentiment and market fears",
2369            &Some("Major decline and concerns about future".to_string()),
2370            &Some("The drop causes risk and threats to vulnerable markets".to_string()),
2371        );
2372        assert!(negative_sentiment.overall_score < 0.0);
2373        assert_eq!(negative_sentiment.classification, "Bearish");
2374
2375        // Test neutral sentiment
2376        let neutral_sentiment = analyze_sentiment(
2377            "Bitcoin price analysis report",
2378            &Some("Regular market update".to_string()),
2379            &None,
2380        );
2381        assert_eq!(neutral_sentiment.classification, "Neutral");
2382
2383        // Test edge case classifications
2384        let slightly_bullish =
2385            analyze_sentiment("Bitcoin shows mild growth and positive signs", &None, &None);
2386        assert!(slightly_bullish.classification.contains("Bullish"));
2387
2388        // Test emotional indicators
2389        let fear_content =
2390            analyze_sentiment("Market crash panic fear worried investors", &None, &None);
2391        assert!(fear_content.emotions.fear > 0.0);
2392
2393        let greed_content = analyze_sentiment(
2394            "Moon lambo rich massive explosive gains profit",
2395            &None,
2396            &None,
2397        );
2398        assert!(greed_content.emotions.greed > 0.0);
2399
2400        let uncertainty_content = analyze_sentiment(
2401            "Maybe perhaps unclear uncertain volatile unpredictable",
2402            &None,
2403            &None,
2404        );
2405        assert!(uncertainty_content.emotions.uncertainty > 0.0);
2406
2407        // Test topic-specific sentiments
2408        let bitcoin_surge = analyze_sentiment("Bitcoin surge hits new highs", &None, &None);
2409        assert!(bitcoin_surge.topic_sentiments.contains_key("bitcoin"));
2410        assert!(bitcoin_surge.topic_sentiments["bitcoin"] > 0.0);
2411
2412        let ethereum_crash = analyze_sentiment("Ethereum crash causes major losses", &None, &None);
2413        assert!(ethereum_crash.topic_sentiments.contains_key("ethereum"));
2414        assert!(ethereum_crash.topic_sentiments["ethereum"] < 0.0);
2415    }
2416
2417    #[test]
2418    fn test_analyze_sentiment_edge_cases() {
2419        // Test empty content
2420        let empty_sentiment = analyze_sentiment("", &None, &None);
2421        assert_eq!(empty_sentiment.overall_score, 0.0);
2422        assert_eq!(empty_sentiment.classification, "Neutral");
2423
2424        // Test content with no sentiment words
2425        let neutral_words = analyze_sentiment(
2426            "The weather is nice today",
2427            &Some("Random content".to_string()),
2428            &None,
2429        );
2430        assert_eq!(neutral_words.overall_score, 0.0);
2431
2432        // Test very long content
2433        let long_content = "bullish ".repeat(100);
2434        let long_sentiment = analyze_sentiment(&long_content, &None, &None);
2435        assert!(long_sentiment.overall_score > 0.0);
2436        assert!(long_sentiment.confidence > 0.0);
2437    }
2438
2439    #[test]
2440    fn test_calculate_market_impact() {
2441        let sentiment = NewsSentiment {
2442            overall_score: 0.5,
2443            confidence: 0.8,
2444            classification: "Bullish".to_string(),
2445            topic_sentiments: HashMap::new(),
2446            emotions: EmotionalIndicators {
2447                fear: 0.0,
2448                greed: 0.0,
2449                excitement: 0.0,
2450                uncertainty: 0.0,
2451                urgency: 0.9, // High urgency
2452            },
2453            key_phrases: vec![],
2454        };
2455
2456        let source = NewsSource {
2457            id: "test".to_string(),
2458            name: "Test Source".to_string(),
2459            url: "https://test.com".to_string(),
2460            category: "Mainstream".to_string(),
2461            credibility_score: 80,
2462            accuracy_rating: None,
2463            bias_score: None,
2464            is_verified: true,
2465            logo_url: None,
2466        };
2467
2468        // Test breaking news category with high urgency
2469        let breaking_category = NewsCategory {
2470            primary: "Breaking".to_string(),
2471            sub_category: None,
2472            tags: vec!["hack".to_string()],
2473            geographic_scope: vec!["Global".to_string()],
2474            target_audience: "Retail".to_string(),
2475        };
2476
2477        let impact = calculate_market_impact(&sentiment, &source, &breaking_category);
2478        assert_eq!(impact.time_horizon, "Immediate");
2479        assert!(impact.impact_score > 0);
2480        assert!(impact.risk_factors.len() > 0);
2481
2482        // Test different categories
2483        let regulation_category = NewsCategory {
2484            primary: "Regulation".to_string(),
2485            sub_category: None,
2486            tags: vec!["regulation".to_string()],
2487            geographic_scope: vec!["US".to_string()],
2488            target_audience: "Institutional".to_string(),
2489        };
2490
2491        let reg_impact = calculate_market_impact(&sentiment, &source, &regulation_category);
2492        assert!(reg_impact
2493            .risk_factors
2494            .contains(&"Regulatory risk".to_string()));
2495
2496        // Test with different sentiment emotions
2497        let fearful_sentiment = NewsSentiment {
2498            overall_score: -0.5,
2499            confidence: 0.8,
2500            classification: "Bearish".to_string(),
2501            topic_sentiments: HashMap::new(),
2502            emotions: EmotionalIndicators {
2503                fear: 0.8,
2504                greed: 0.0,
2505                excitement: 0.0,
2506                uncertainty: 0.7,
2507                urgency: 0.0,
2508            },
2509            key_phrases: vec![],
2510        };
2511
2512        let fear_impact = calculate_market_impact(&fearful_sentiment, &source, &breaking_category);
2513        assert!(fear_impact
2514            .risk_factors
2515            .contains(&"Market fear".to_string()));
2516        assert!(fear_impact
2517            .risk_factors
2518            .contains(&"High uncertainty".to_string()));
2519    }
2520
2521    #[test]
2522    fn test_calculate_market_impact_simple() {
2523        // Test high impact keywords
2524        let hack_impact = calculate_market_impact_simple("Major hack exploit discovered");
2525        assert_eq!(hack_impact.impact_level, "High");
2526        assert_eq!(hack_impact.impact_score, 70);
2527
2528        let sec_impact = calculate_market_impact_simple("SEC announces new ban");
2529        assert_eq!(sec_impact.impact_level, "High");
2530        assert_eq!(sec_impact.impact_score, 70);
2531
2532        // Test medium impact keywords
2533        let launch_impact = calculate_market_impact_simple("Company announces new launch");
2534        assert_eq!(launch_impact.impact_level, "Medium");
2535        assert_eq!(launch_impact.impact_score, 50);
2536
2537        // Test low impact (no special keywords)
2538        let normal_impact = calculate_market_impact_simple("Regular news update");
2539        assert_eq!(normal_impact.impact_level, "Low");
2540        assert_eq!(normal_impact.impact_score, 30);
2541
2542        // Test case insensitivity
2543        let case_impact = calculate_market_impact_simple("HACK discovered in PROTOCOL");
2544        assert_eq!(case_impact.impact_level, "High");
2545    }
2546
2547    #[test]
2548    fn test_extract_entities_from_text() {
2549        // Test cryptocurrency entities
2550        let crypto_text = "Bitcoin and Ethereum are leading cryptocurrencies";
2551        let entities = extract_entities_from_text(
2552            crypto_text,
2553            &Some("BTC and ETH analysis".to_string()),
2554            &Some("Solana SOL also mentioned".to_string()),
2555            "crypto",
2556        );
2557
2558        let crypto_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2559        assert!(crypto_names.contains(&"Bitcoin"));
2560        assert!(crypto_names.contains(&"Ethereum"));
2561        assert!(crypto_names.contains(&"Solana"));
2562
2563        // Test company entities
2564        let company_text = "Coinbase and Binance are major exchanges";
2565        let company_entities = extract_entities_from_text(company_text, &None, &None, "exchanges");
2566
2567        let company_names: Vec<&str> = company_entities.iter().map(|e| e.name.as_str()).collect();
2568        assert!(company_names.contains(&"Coinbase"));
2569        assert!(company_names.contains(&"Binance"));
2570
2571        // Test person entities
2572        let person_text = "Vitalik Buterin and CZ discussed the future";
2573        let person_entities = extract_entities_from_text(person_text, &None, &None, "crypto");
2574
2575        let person_names: Vec<&str> = person_entities.iter().map(|e| e.name.as_str()).collect();
2576        assert!(person_names.contains(&"Vitalik Buterin"));
2577        assert!(person_names.contains(&"CZ"));
2578
2579        // Test protocol entities
2580        let protocol_text = "DeFi and NFT protocols are growing";
2581        let protocol_entities =
2582            extract_entities_from_text(protocol_text, &None, &None, "protocols");
2583
2584        let protocol_names: Vec<&str> = protocol_entities.iter().map(|e| e.name.as_str()).collect();
2585        assert!(protocol_names.contains(&"DeFi"));
2586        assert!(protocol_names.contains(&"NFT"));
2587
2588        // Test default topic when no entities found
2589        let no_entities =
2590            extract_entities_from_text("Random news content", &None, &None, "default_topic");
2591        assert_eq!(no_entities.len(), 1);
2592        assert_eq!(no_entities[0].name, "default_topic");
2593        assert_eq!(no_entities[0].entity_type, "Topic");
2594
2595        // Test sorting by relevance
2596        let multi_mention_text = "Bitcoin Bitcoin Bitcoin Ethereum";
2597        let sorted_entities =
2598            extract_entities_from_text(multi_mention_text, &None, &None, "crypto");
2599        // Bitcoin should be first due to higher mention count
2600        assert_eq!(sorted_entities[0].name, "Bitcoin");
2601        assert!(sorted_entities[0].mention_count > sorted_entities[1].mention_count);
2602    }
2603
2604    #[test]
2605    fn test_extract_tags_from_text() {
2606        // Test various tag extractions
2607        let defi_text = "DeFi protocols are revolutionizing finance";
2608        let defi_tags = extract_tags_from_text(defi_text, &None);
2609        assert!(defi_tags.contains(&"defi".to_string()));
2610
2611        let nft_text = "NFT marketplace sees growth";
2612        let nft_tags = extract_tags_from_text(nft_text, &Some("NFT content".to_string()));
2613        assert!(nft_tags.contains(&"nft".to_string()));
2614
2615        let multi_tag_text = "Layer 2 solutions improve smart contract efficiency";
2616        let multi_tags = extract_tags_from_text(multi_tag_text, &None);
2617        assert!(multi_tags.contains(&"layer2".to_string()));
2618        assert!(multi_tags.contains(&"smart-contracts".to_string()));
2619
2620        // Test case insensitivity
2621        let case_text = "DEFI and NFT protocols";
2622        let case_tags = extract_tags_from_text(case_text, &None);
2623        assert!(case_tags.contains(&"defi".to_string()));
2624        assert!(case_tags.contains(&"nft".to_string()));
2625
2626        // Test no tags found
2627        let no_tag_text = "Random content without keywords";
2628        let no_tags = extract_tags_from_text(no_tag_text, &None);
2629        assert!(no_tags.is_empty());
2630
2631        // Test duplicate removal
2632        let duplicate_text = "DeFi DeFi protocols and DeFi systems";
2633        let dup_tags = extract_tags_from_text(duplicate_text, &None);
2634        assert_eq!(dup_tags.iter().filter(|&t| t == "defi").count(), 1);
2635    }
2636
2637    #[test]
2638    fn test_extract_crypto_mentions() {
2639        // Test various crypto mentions
2640        let crypto_text = "Bitcoin BTC Ethereum ETH prices";
2641        let cryptos = extract_crypto_mentions(
2642            crypto_text,
2643            &Some("Solana SOL analysis".to_string()),
2644            &Some("Cardano ADA update".to_string()),
2645        );
2646
2647        assert!(cryptos.contains(&"bitcoin".to_string()));
2648        assert!(cryptos.contains(&"ethereum".to_string()));
2649        assert!(cryptos.contains(&"solana".to_string()));
2650        assert!(cryptos.contains(&"cardano".to_string()));
2651
2652        // Test case insensitivity
2653        let case_text = "BITCOIN and ethereum prices";
2654        let case_cryptos = extract_crypto_mentions(case_text, &None, &None);
2655        assert!(case_cryptos.contains(&"bitcoin".to_string()));
2656        assert!(case_cryptos.contains(&"ethereum".to_string()));
2657
2658        // Test symbol vs name deduplication
2659        let symbol_text = "BTC Bitcoin analysis";
2660        let symbol_cryptos = extract_crypto_mentions(symbol_text, &None, &None);
2661        assert_eq!(symbol_cryptos.iter().filter(|&c| c == "bitcoin").count(), 1);
2662
2663        // Test no crypto mentions
2664        let no_crypto_text = "Weather news today";
2665        let no_cryptos = extract_crypto_mentions(no_crypto_text, &None, &None);
2666        assert!(no_cryptos.is_empty());
2667    }
2668
2669    #[test]
2670    fn test_calculate_quality_metrics() {
2671        // Test high quality metrics
2672        let high_quality = calculate_quality_metrics(
2673            "Comprehensive Analysis of Market Trends",
2674            &Some("Detailed description of market conditions".to_string()),
2675            &Some("a".repeat(2500)), // Long content > 2000 chars
2676            90,                      // High source credibility
2677        );
2678        assert_eq!(high_quality.depth_score, 85);
2679        assert_eq!(high_quality.writing_quality, 75);
2680        assert_eq!(high_quality.factual_accuracy, 90);
2681        assert!(high_quality.overall_score > 70);
2682
2683        // Test medium quality metrics
2684        let medium_quality = calculate_quality_metrics(
2685            "Market Update",
2686            &Some("Brief description".to_string()),
2687            &Some("a".repeat(1500)), // Medium content length
2688            70,
2689        );
2690        assert_eq!(medium_quality.depth_score, 70);
2691
2692        // Test low quality metrics
2693        let low_quality = calculate_quality_metrics("News", &None, &None, 50);
2694        assert_eq!(low_quality.depth_score, 25);
2695        assert_eq!(low_quality.writing_quality, 50);
2696
2697        // Test edge cases
2698        let edge_case = calculate_quality_metrics(
2699            "Short title with exactly five words here",
2700            &Some("Description present".to_string()),
2701            &Some("a".repeat(500)), // Exactly 500 chars
2702            75,
2703        );
2704        assert_eq!(edge_case.depth_score, 55);
2705        assert_eq!(edge_case.writing_quality, 75);
2706    }
2707
2708    #[test]
2709    fn test_calculate_market_impact_from_content() {
2710        // Test critical impact keywords
2711        let critical_content = calculate_market_impact_from_content(
2712            "Major hack exploit discovered",
2713            &Some("Criminal fraud investigation".to_string()),
2714            &Some("SEC enforcement action bankruptcy".to_string()),
2715        );
2716        assert_eq!(critical_content.impact_level, "Critical");
2717        assert_eq!(critical_content.time_horizon, "Immediate");
2718        assert!(critical_content.impact_score >= 85);
2719
2720        // Test high impact keywords
2721        let high_content = calculate_market_impact_from_content(
2722            "Partnership announcement",
2723            &Some("Major adoption integration".to_string()),
2724            &Some("New launch acquisition".to_string()),
2725        );
2726        assert_eq!(high_content.impact_level, "High");
2727
2728        // Test medium impact keywords
2729        let medium_content = calculate_market_impact_from_content(
2730            "Update announcement",
2731            &Some("Upgrade report".to_string()),
2732            &Some("Analysis of trends".to_string()),
2733        );
2734        assert_eq!(medium_content.impact_level, "Medium");
2735
2736        // Test low impact (no keywords)
2737        let low_content = calculate_market_impact_from_content(
2738            "Regular news",
2739            &Some("Standard content".to_string()),
2740            &None,
2741        );
2742        assert_eq!(low_content.impact_level, "Low");
2743
2744        // Test sentiment impact on score
2745        let positive_sentiment_content = calculate_market_impact_from_content(
2746            "Partnership bullish surge rally",
2747            &Some("Strong positive growth".to_string()),
2748            &None,
2749        );
2750        assert!(positive_sentiment_content.impact_score > 70);
2751
2752        // Test potential price impact calculation
2753        assert!(critical_content.potential_price_impact.is_some());
2754        assert!(high_content.potential_price_impact.is_some());
2755        assert!(low_content.potential_price_impact.is_none());
2756    }
2757
2758    #[test]
2759    fn test_extract_affected_sectors() {
2760        // Test single sector
2761        let defi_text = "defi protocols are growing";
2762        let defi_sectors = extract_affected_sectors(defi_text);
2763        assert!(defi_sectors.contains(&"DeFi".to_string()));
2764
2765        // Test multiple sectors
2766        let multi_text = "nft marketplace and exchange listing";
2767        let multi_sectors = extract_affected_sectors(multi_text);
2768        assert!(multi_sectors.contains(&"NFT".to_string()));
2769        assert!(multi_sectors.contains(&"CEX".to_string()));
2770
2771        // Test no sectors (should default to General)
2772        let general_text = "random news content";
2773        let general_sectors = extract_affected_sectors(general_text);
2774        assert_eq!(general_sectors, vec!["General".to_string()]);
2775
2776        // Test deduplication
2777        let dup_text = "defi and defi protocols";
2778        let dup_sectors = extract_affected_sectors(dup_text);
2779        assert_eq!(dup_sectors.iter().filter(|&s| s == "DeFi").count(), 1);
2780    }
2781
2782    #[test]
2783    fn test_extract_risk_factors() {
2784        // Test various risk factors
2785        let risk_text = "regulation SEC hack exploit volatile uncertain lawsuit investigation";
2786        let risks = extract_risk_factors(risk_text);
2787
2788        assert!(risks.contains(&"Regulatory uncertainty".to_string()));
2789        assert!(risks.contains(&"Regulatory action".to_string()));
2790        assert!(risks.contains(&"Security vulnerability".to_string()));
2791        assert!(risks.contains(&"Protocol vulnerability".to_string()));
2792        assert!(risks.contains(&"Market volatility".to_string()));
2793        assert!(risks.contains(&"Market uncertainty".to_string()));
2794        assert!(risks.contains(&"Legal risk".to_string()));
2795        assert!(risks.contains(&"Regulatory investigation".to_string()));
2796
2797        // Test no risk factors
2798        let safe_text = "positive news about growth";
2799        let no_risks = extract_risk_factors(safe_text);
2800        assert!(no_risks.is_empty());
2801
2802        // Test deduplication
2803        let dup_risk_text = "regulation and regulation concerns";
2804        let dup_risks = extract_risk_factors(dup_risk_text);
2805        assert_eq!(
2806            dup_risks
2807                .iter()
2808                .filter(|&r| r == "Regulatory uncertainty")
2809                .count(),
2810            1
2811        );
2812    }
2813
2814    // Helper functions for creating test data
2815    fn create_test_article_with_credibility(credibility: u32) -> NewsArticle {
2816        NewsArticle {
2817            id: "test".to_string(),
2818            title: "Test Article".to_string(),
2819            url: "https://test.com".to_string(),
2820            description: None,
2821            content: None,
2822            published_at: Utc::now(),
2823            source: NewsSource {
2824                id: "test".to_string(),
2825                name: "Test Source".to_string(),
2826                url: "https://test.com".to_string(),
2827                category: "Test".to_string(),
2828                credibility_score: credibility,
2829                accuracy_rating: None,
2830                bias_score: None,
2831                is_verified: true,
2832                logo_url: None,
2833            },
2834            category: NewsCategory {
2835                primary: "Test".to_string(),
2836                sub_category: None,
2837                tags: vec![],
2838                geographic_scope: vec![],
2839                target_audience: "Test".to_string(),
2840            },
2841            sentiment: NewsSentiment {
2842                overall_score: 0.0,
2843                confidence: 0.5,
2844                classification: "Neutral".to_string(),
2845                topic_sentiments: HashMap::new(),
2846                emotions: EmotionalIndicators {
2847                    fear: 0.0,
2848                    greed: 0.0,
2849                    excitement: 0.0,
2850                    uncertainty: 0.0,
2851                    urgency: 0.0,
2852                },
2853                key_phrases: vec![],
2854            },
2855            market_impact: MarketImpact {
2856                impact_level: "Low".to_string(),
2857                impact_score: 30,
2858                time_horizon: "Short-term".to_string(),
2859                affected_sectors: vec![],
2860                potential_price_impact: None,
2861                historical_correlation: None,
2862                risk_factors: vec![],
2863            },
2864            entities: vec![],
2865            related_assets: vec![],
2866            quality_metrics: QualityMetrics {
2867                overall_score: 50,
2868                depth_score: 50,
2869                factual_accuracy: 50,
2870                writing_quality: 50,
2871                citation_quality: 50,
2872                uniqueness_score: 50,
2873                reading_difficulty: 5,
2874            },
2875            social_metrics: None,
2876        }
2877    }
2878
2879    fn create_test_article_with_url(url: &str) -> NewsArticle {
2880        let mut article = create_test_article_with_credibility(70);
2881        article.url = url.to_string();
2882        article
2883    }
2884
2885    fn create_test_article_with_sentiment(sentiment_score: f64) -> NewsArticle {
2886        let mut article = create_test_article_with_credibility(70);
2887        article.sentiment.overall_score = sentiment_score;
2888        article
2889    }
2890}