1use crate::{client::WebClient, error::WebToolError};
7use async_trait::async_trait;
8use chrono::{DateTime, Utc};
9use regex::Regex;
10use riglr_core::provider::ApplicationContext;
11use riglr_core::sentiment::SentimentAnalyzerMarker;
12use riglr_macros::tool;
13use schemars::JsonSchema;
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::hash::{Hash, Hasher};
17use std::sync::Arc;
18use tracing::{debug, info, warn};
19
20#[async_trait]
22pub trait SentimentAnalyzer: Send + Sync {
23 async fn analyze(
25 &self,
26 title: &str,
27 description: Option<&str>,
28 content: Option<&str>,
29 ) -> Result<NewsSentiment, WebToolError>;
30}
31
32pub struct LexiconSentimentAnalyzer {
34 positive_words: Vec<(&'static str, f64)>,
36 negative_words: Vec<(&'static str, f64)>,
38}
39
40impl Default for LexiconSentimentAnalyzer {
41 fn default() -> Self {
42 Self {
43 positive_words: vec![
44 ("bullish", 0.8),
45 ("surge", 0.7),
46 ("rally", 0.7),
47 ("breakthrough", 0.8),
48 ("adoption", 0.6),
49 ("partnership", 0.6),
50 ("growth", 0.5),
51 ("success", 0.6),
52 ("innovative", 0.5),
53 ("leading", 0.4),
54 ("strong", 0.5),
55 ("positive", 0.5),
56 ("gains", 0.6),
57 ("rise", 0.5),
58 ("increase", 0.4),
59 ("improve", 0.5),
60 ("upgrade", 0.6),
61 ("expand", 0.5),
62 ("launch", 0.4),
63 ("milestone", 0.6),
64 ],
65 negative_words: vec![
66 ("bearish", -0.8),
67 ("crash", -0.9),
68 ("plunge", -0.8),
69 ("collapse", -0.9),
70 ("hack", -0.9),
71 ("exploit", -0.9),
72 ("scam", -0.9),
73 ("fraud", -0.9),
74 ("decline", -0.6),
75 ("fall", -0.5),
76 ("drop", -0.5),
77 ("loss", -0.6),
78 ("failure", -0.7),
79 ("risk", -0.4),
80 ("concern", -0.4),
81 ("warning", -0.5),
82 ("threat", -0.6),
83 ("vulnerable", -0.6),
84 ("weak", -0.5),
85 ("crisis", -0.8),
86 ("panic", -0.7),
87 ("fear", -0.6),
88 ("uncertainty", -0.5),
89 ("volatile", -0.4),
90 ("dump", -0.7),
91 ("rug", -0.9),
92 ("regulatory", -0.3),
93 ("lawsuit", -0.6),
94 ("investigation", -0.5),
95 ("ban", -0.7),
96 ],
97 }
98 }
99}
100
101#[async_trait]
102impl SentimentAnalyzer for LexiconSentimentAnalyzer {
103 async fn analyze(
104 &self,
105 title: &str,
106 description: Option<&str>,
107 content: Option<&str>,
108 ) -> Result<NewsSentiment, WebToolError> {
109 Ok(self.analyze_sentiment_impl(title, description, content))
111 }
112}
113
114impl SentimentAnalyzerMarker for LexiconSentimentAnalyzer {}
116
117impl LexiconSentimentAnalyzer {
118 fn analyze_sentiment_impl(
120 &self,
121 title: &str,
122 description: Option<&str>,
123 content: Option<&str>,
124 ) -> NewsSentiment {
125 let full_text = format!(
126 "{} {} {}",
127 title,
128 description.unwrap_or(""),
129 content.unwrap_or("")
130 );
131
132 let text_lower = full_text.to_lowercase();
133
134 let mut sentiment_score = 0.0;
136 let mut word_count = 0;
137
138 for (word, weight) in &self.positive_words {
139 let count = text_lower.matches(word).count();
140 sentiment_score += count as f64 * weight;
141 word_count += count;
142 }
143
144 for (word, weight) in &self.negative_words {
145 let count = text_lower.matches(word).count();
146 sentiment_score += count as f64 * weight;
147 word_count += count;
148 }
149
150 let overall_score = if word_count > 0 {
152 (sentiment_score / word_count as f64).clamp(-1.0, 1.0)
153 } else {
154 0.0
155 };
156
157 let confidence = ((word_count as f64 / 10.0).min(1.0) * 0.5
159 + (full_text.len() as f64 / 500.0).min(1.0) * 0.5)
160 .clamp(0.3, 0.95);
161
162 let classification = if overall_score > 0.2 {
164 "Bullish"
165 } else if overall_score < -0.2 {
166 "Bearish"
167 } else {
168 "Neutral"
169 }
170 .to_string();
171
172 let emotions = self.calculate_emotions(&text_lower);
174
175 let key_phrases = self.extract_key_phrases(&full_text);
177
178 let topic_sentiments = self.calculate_topic_sentiments(&text_lower, overall_score);
180
181 NewsSentiment {
182 overall_score,
183 confidence,
184 classification,
185 topic_sentiments,
186 emotions,
187 key_phrases,
188 }
189 }
190
191 fn calculate_emotions(&self, text_lower: &str) -> EmotionalIndicators {
192 let fear_words = ["fear", "panic", "crash", "crisis", "collapse"];
193 let greed_words = ["moon", "rally", "surge", "bullish", "fomo"];
194
195 let fear_count = fear_words
196 .iter()
197 .filter(|w| text_lower.contains(*w))
198 .count();
199 let greed_count = greed_words
200 .iter()
201 .filter(|w| text_lower.contains(*w))
202 .count();
203
204 EmotionalIndicators {
205 fear: (fear_count as f64 / 5.0).min(1.0),
206 greed: (greed_count as f64 / 5.0).min(1.0),
207 excitement: if text_lower.contains("exciting") || text_lower.contains("breakthrough") {
208 0.5
209 } else {
210 0.0
211 },
212 uncertainty: if text_lower.contains("uncertain") || text_lower.contains("volatile") {
213 0.5
214 } else {
215 0.0
216 },
217 urgency: if text_lower.contains("urgent") || text_lower.contains("immediate") {
218 0.5
219 } else {
220 0.0
221 },
222 }
223 }
224
225 fn extract_key_phrases(&self, full_text: &str) -> Vec<SentimentPhrase> {
226 let mut key_phrases = Vec::new();
227
228 let phrase_patterns = [
230 (
231 r"(?i)(bullish|positive|optimistic) (?:on|about|for) (\w+)",
232 0.5,
233 ),
234 (
235 r"(?i)(bearish|negative|pessimistic) (?:on|about|for) (\w+)",
236 -0.5,
237 ),
238 (r"(?i)all.time.high", 0.6),
239 (r"(?i)all.time.low", -0.6),
240 (r"(?i)break(?:ing|s)?\s+(?:through|above)", 0.4),
241 (r"(?i)break(?:ing|s)?\s+(?:below|down)", -0.4),
242 ];
243
244 for (pattern, contribution) in &phrase_patterns {
245 if let Ok(re) = Regex::new(pattern) {
246 for matched in re.find_iter(full_text) {
247 key_phrases.push(SentimentPhrase {
248 phrase: matched.as_str().to_string(),
249 sentiment_contribution: *contribution,
250 confidence: 0.7,
251 });
252 }
253 }
254 }
255
256 key_phrases
257 }
258
259 fn calculate_topic_sentiments(
260 &self,
261 text_lower: &str,
262 overall_score: f64,
263 ) -> HashMap<String, f64> {
264 let mut topic_sentiments = HashMap::new();
265 let topics = [
266 "bitcoin",
267 "ethereum",
268 "defi",
269 "nft",
270 "regulation",
271 "adoption",
272 ];
273
274 for topic in &topics {
275 if text_lower.contains(topic) {
276 let topic_score = if text_lower.contains(&format!("{} surge", topic))
278 || text_lower.contains(&format!("{} rally", topic))
279 {
280 0.5
281 } else if text_lower.contains(&format!("{} crash", topic))
282 || text_lower.contains(&format!("{} plunge", topic))
283 {
284 -0.5
285 } else {
286 overall_score * 0.7 };
288 topic_sentiments.insert(topic.to_string(), topic_score);
289 }
290 }
291
292 topic_sentiments
293 }
294}
295
296#[derive(Debug, Clone)]
298pub struct NewsConfig {
299 pub newsapi_key: String,
301 pub cryptopanic_key: String,
303 pub base_url: String,
305 pub max_articles: u32,
307 pub freshness_hours: u32,
309 pub min_credibility_score: u32,
311}
312
313#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
315pub struct NewsArticle {
316 pub id: String,
318 pub title: String,
320 pub url: String,
322 pub description: Option<String>,
324 pub content: Option<String>,
326 pub published_at: DateTime<Utc>,
328 pub source: NewsSource,
330 pub category: NewsCategory,
332 pub sentiment: NewsSentiment,
334 pub market_impact: MarketImpact,
336 pub entities: Vec<NewsEntity>,
338 pub related_assets: Vec<String>,
340 pub quality_metrics: QualityMetrics,
342 pub social_metrics: Option<SocialMetrics>,
344}
345
346#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
348pub struct NewsSource {
349 pub id: String,
351 pub name: String,
353 pub url: String,
355 pub category: String,
357 pub credibility_score: u32,
359 pub accuracy_rating: Option<f64>,
361 pub bias_score: Option<f64>,
363 pub is_verified: bool,
365 pub logo_url: Option<String>,
367}
368
369#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
371pub struct NewsCategory {
372 pub primary: String,
374 pub sub_category: Option<String>,
376 pub tags: Vec<String>,
378 pub geographic_scope: Vec<String>,
380 pub target_audience: String,
382}
383
384#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
386pub struct NewsSentiment {
387 pub overall_score: f64,
389 pub confidence: f64,
391 pub classification: String,
393 pub topic_sentiments: HashMap<String, f64>,
395 pub emotions: EmotionalIndicators,
397 pub key_phrases: Vec<SentimentPhrase>,
399}
400
401#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
403pub struct EmotionalIndicators {
404 pub fear: f64,
406 pub greed: f64,
408 pub excitement: f64,
410 pub uncertainty: f64,
412 pub urgency: f64,
414}
415
416#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
418pub struct SentimentPhrase {
419 pub phrase: String,
421 pub sentiment_contribution: f64,
423 pub confidence: f64,
425}
426
427#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
429pub struct MarketImpact {
430 pub impact_level: String,
432 pub impact_score: u32,
434 pub time_horizon: String,
436 pub affected_sectors: Vec<String>,
438 pub potential_price_impact: Option<f64>,
440 pub historical_correlation: Option<f64>,
442 pub risk_factors: Vec<String>,
444}
445
446#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
448pub struct NewsEntity {
449 pub name: String,
451 pub entity_type: String,
453 pub relevance_score: f64,
455 pub sentiment: Option<f64>,
457 pub mention_count: u32,
459 pub contexts: Vec<String>,
461}
462
463#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
465pub struct QualityMetrics {
466 pub overall_score: u32,
468 pub depth_score: u32,
470 pub factual_accuracy: u32,
472 pub writing_quality: u32,
474 pub citation_quality: u32,
476 pub uniqueness_score: u32,
478 pub reading_difficulty: u32,
480}
481
482#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
484pub struct SocialMetrics {
485 pub total_shares: u32,
487 pub twitter_shares: u32,
489 pub reddit_mentions: u32,
491 pub linkedin_shares: u32,
493 pub social_sentiment: f64,
495 pub viral_score: u32,
497 pub influencer_mentions: u32,
499}
500
501#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
503pub struct NewsAggregationResult {
504 pub topic: String,
506 pub articles: Vec<NewsArticle>,
508 pub metadata: AggregationMetadata,
510 pub insights: NewsInsights,
512 pub trending_topics: Vec<TrendingTopic>,
514 pub aggregated_at: DateTime<Utc>,
516}
517
518#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
520pub struct AggregationMetadata {
521 pub total_articles: u32,
523 pub returned_articles: u32,
525 pub sources_queried: Vec<String>,
527 pub avg_credibility: f64,
529 pub time_range_hours: u32,
531 pub duplicates_removed: u32,
533}
534
535#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
537pub struct NewsInsights {
538 pub overall_sentiment: f64,
540 pub sentiment_trend: String, pub top_entities: Vec<EntityMention>,
544 pub dominant_themes: Vec<String>,
546 pub geographic_distribution: HashMap<String, u32>,
548 pub source_diversity: SourceDiversity,
550 pub impact_distribution: HashMap<String, u32>,
552}
553
554#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
556pub struct EntityMention {
557 pub name: String,
559 pub mention_count: u32,
561 pub avg_sentiment: f64,
563 pub entity_type: String,
565 pub is_trending: bool,
567}
568
569#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
571pub struct SourceDiversity {
572 pub unique_sources: u32,
574 pub source_types: HashMap<String, u32>,
576 pub geographic_sources: HashMap<String, u32>,
578 pub credibility_distribution: HashMap<String, u32>, }
581
582#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
584pub struct TrendingTopic {
585 pub topic: String,
587 pub article_count: u32,
589 pub velocity: f64,
591 pub sentiment: f64,
593 pub related_keywords: Vec<String>,
595 pub geographic_focus: Vec<String>,
597}
598
599#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
601pub struct BreakingNewsAlert {
602 pub id: String,
604 pub severity: String,
606 pub title: String,
608 pub description: String,
610 pub articles: Vec<NewsArticle>,
612 pub estimated_impact: MarketImpact,
614 pub created_at: DateTime<Utc>,
616 pub expires_at: Option<DateTime<Utc>>,
618}
619
620impl Default for NewsConfig {
621 fn default() -> Self {
622 Self {
623 newsapi_key: String::default(),
624 cryptopanic_key: String::default(),
625 base_url: "https://newsapi.org/v2".to_string(),
626 max_articles: 50,
627 freshness_hours: 24,
628 min_credibility_score: 60,
629 }
630 }
631}
632
633impl NewsConfig {
634 fn from_context(context: &ApplicationContext) -> Self {
636 Self {
637 newsapi_key: context
638 .config
639 .providers
640 .newsapi_key
641 .clone()
642 .unwrap_or_default(),
643 cryptopanic_key: context
644 .config
645 .providers
646 .cryptopanic_key
647 .clone()
648 .unwrap_or_default(),
649 base_url: "https://newsapi.org/v2".to_string(),
650 max_articles: 50,
651 freshness_hours: 24,
652 min_credibility_score: 60,
653 }
654 }
655}
656
657#[tool]
662pub async fn get_crypto_news(
663 context: &riglr_core::provider::ApplicationContext,
664 topic: String,
665 time_window: Option<String>, source_types: Option<Vec<String>>, min_credibility: Option<u32>,
668 include_analysis: Option<bool>,
669) -> crate::error::Result<NewsAggregationResult> {
670 debug!(
671 "Aggregating crypto news for topic: '{}' within {}",
672 topic,
673 time_window.as_deref().unwrap_or("24h")
674 );
675
676 let config = NewsConfig::from_context(context);
677 if config.newsapi_key.is_empty() && config.cryptopanic_key.is_empty() {
678 return Err(WebToolError::Auth(
679 "No news API keys configured".to_string(),
680 ));
681 }
682
683 let client = WebClient::default();
684
685 let mut all_articles = Vec::new();
687 let mut sources_queried = Vec::new();
688
689 if !config.newsapi_key.is_empty() {
691 match query_newsapi(&client, &config, &topic, &time_window).await {
692 Ok(mut articles) => {
693 all_articles.append(&mut articles);
694 sources_queried.push("NewsAPI".to_string());
695 }
696 Err(e) => warn!("Failed to query NewsAPI: {}", e),
697 }
698 }
699
700 if !config.cryptopanic_key.is_empty() {
702 match query_cryptopanic(&client, &config, &topic, &time_window).await {
703 Ok(mut articles) => {
704 all_articles.append(&mut articles);
705 sources_queried.push("CryptoPanic".to_string());
706 }
707 Err(e) => warn!("Failed to query CryptoPanic: {}", e),
708 }
709 }
710
711 if let Some(types) = source_types {
713 all_articles.retain(|article| types.contains(&article.source.category.to_lowercase()));
714 }
715
716 let min_cred = min_credibility.unwrap_or(config.min_credibility_score);
718 all_articles.retain(|article| article.source.credibility_score >= min_cred);
719
720 let articles = deduplicate_articles(all_articles);
722
723 let insights = if include_analysis.unwrap_or(true) {
725 analyze_news_collection(&articles).await?
726 } else {
727 NewsInsights {
728 overall_sentiment: 0.0,
729 sentiment_trend: "Unknown".to_string(),
730 top_entities: vec![],
731 dominant_themes: vec![],
732 geographic_distribution: HashMap::new(),
733 source_diversity: SourceDiversity {
734 unique_sources: 0,
735 source_types: HashMap::new(),
736 geographic_sources: HashMap::new(),
737 credibility_distribution: HashMap::new(),
738 },
739 impact_distribution: HashMap::new(),
740 }
741 };
742
743 let trending_topics = extract_trending_topics(&articles).await?;
745
746 let result = NewsAggregationResult {
747 topic: topic.clone(),
748 articles: articles.clone(),
749 metadata: AggregationMetadata {
750 total_articles: articles.len() as u32,
751 returned_articles: articles.len() as u32,
752 sources_queried,
753 avg_credibility: calculate_avg_credibility(&articles),
754 time_range_hours: parse_time_window(&time_window.unwrap_or_else(|| "24h".to_string())),
755 duplicates_removed: 0, },
757 insights,
758 trending_topics,
759 aggregated_at: Utc::now(),
760 };
761
762 info!(
763 "Crypto news aggregation completed: {} articles for '{}'",
764 result.articles.len(),
765 topic
766 );
767
768 Ok(result)
769}
770
771#[tool]
776pub async fn get_trending_news(
777 context: &riglr_core::provider::ApplicationContext,
778 time_window: Option<String>, categories: Option<Vec<String>>, min_impact_score: Option<u32>,
781 limit: Option<u32>,
782) -> crate::error::Result<NewsAggregationResult> {
783 debug!(
784 "Fetching trending crypto news within {}",
785 time_window.as_deref().unwrap_or("6h")
786 );
787
788 let config = NewsConfig::from_context(context);
789 let client = WebClient::default();
790
791 let trending_articles = fetch_trending_articles(
793 &client,
794 &config,
795 &time_window,
796 &categories,
797 min_impact_score.unwrap_or(60),
798 )
799 .await?;
800
801 let articles: Vec<NewsArticle> = trending_articles
802 .into_iter()
803 .take(limit.unwrap_or(30) as usize)
804 .collect();
805
806 let insights = analyze_trending_patterns(&articles).await?;
808 let trending_topics = extract_trending_topics(&articles).await?;
809
810 let result = NewsAggregationResult {
811 topic: "Trending".to_string(),
812 articles: articles.clone(),
813 metadata: AggregationMetadata {
814 total_articles: articles.len() as u32,
815 returned_articles: articles.len() as u32,
816 sources_queried: vec!["Multiple".to_string()],
817 avg_credibility: calculate_avg_credibility(&articles),
818 time_range_hours: parse_time_window(&time_window.unwrap_or_else(|| "6h".to_string())),
819 duplicates_removed: 0,
820 },
821 insights,
822 trending_topics,
823 aggregated_at: Utc::now(),
824 };
825
826 info!(
827 "Trending news aggregation completed: {} trending articles",
828 result.articles.len()
829 );
830
831 Ok(result)
832}
833
834#[tool]
839pub async fn monitor_breaking_news(
840 context: &riglr_core::provider::ApplicationContext,
841 keywords: Vec<String>,
842 severity_threshold: Option<String>, impact_threshold: Option<u32>, _alert_channels: Option<Vec<String>>, ) -> crate::error::Result<Vec<BreakingNewsAlert>> {
846 debug!("Monitoring breaking news for keywords: {:?}", keywords);
847
848 let config = NewsConfig::from_context(context);
849 let client = WebClient::default();
850
851 let mut alerts = Vec::new();
852
853 for keyword in keywords {
855 match detect_breaking_news(&client, &config, &keyword).await {
856 Ok(mut keyword_alerts) => {
857 alerts.append(&mut keyword_alerts);
858 }
859 Err(e) => {
860 warn!("Failed to check breaking news for '{}': {}", keyword, e);
861 }
862 }
863 }
864
865 let severity_level = severity_threshold.unwrap_or_else(|| "Medium".to_string());
867 let impact_level = impact_threshold.unwrap_or(60);
868
869 alerts.retain(|alert| {
870 is_above_severity_threshold(&alert.severity, &severity_level)
871 && alert.estimated_impact.impact_score >= impact_level
872 });
873
874 info!(
875 "Breaking news monitoring completed: {} alerts generated",
876 alerts.len()
877 );
878
879 Ok(alerts)
880}
881
882#[tool]
887pub async fn analyze_market_sentiment(
888 context: &riglr_core::provider::ApplicationContext,
889 time_window: Option<String>, asset_filter: Option<Vec<String>>, _source_weights: Option<HashMap<String, f64>>, _include_social: Option<bool>,
893) -> crate::error::Result<NewsInsights> {
894 debug!(
895 "Analyzing market sentiment from news over {}",
896 time_window.as_deref().unwrap_or("24h")
897 );
898
899 let _config = NewsConfig::from_context(context);
900 let _client = WebClient::default();
901
902 let recent_news = if let Some(assets) = &asset_filter {
904 let mut all_news = Vec::new();
905 for asset in assets {
906 match get_crypto_news(
907 context,
908 asset.clone(),
909 time_window.clone(),
910 None,
911 Some(70), Some(false), )
914 .await
915 {
916 Ok(result) => all_news.extend(result.articles),
917 Err(e) => warn!("Failed to get news for {}: {}", asset, e),
918 }
919 }
920 all_news
921 } else {
922 match get_trending_news(context, time_window, None, Some(50), Some(100)).await {
924 Ok(result) => result.articles,
925 Err(_) => vec![], }
927 };
928
929 let insights = analyze_news_collection(&recent_news).await?;
931
932 info!(
933 "Market sentiment analysis completed from {} articles",
934 recent_news.len()
935 );
936
937 Ok(insights)
938}
939
940async fn query_newsapi(
942 client: &WebClient,
943 config: &NewsConfig,
944 topic: &str,
945 time_window: &Option<String>,
946) -> crate::error::Result<Vec<NewsArticle>> {
947 let url = format!("{}/everything", config.base_url);
949 let window = time_window
950 .clone()
951 .unwrap_or_else(|| format!("{}h", config.freshness_hours));
952 let hours = parse_time_window(&window) as i64;
953 let from = (Utc::now() - chrono::Duration::hours(hours)).to_rfc3339();
954
955 let mut params = std::collections::HashMap::new();
956 params.insert("q".to_string(), topic.to_string());
957 params.insert("language".to_string(), "en".to_string());
958 params.insert("sortBy".to_string(), "publishedAt".to_string());
959 params.insert("from".to_string(), from);
960 params.insert("pageSize".to_string(), config.max_articles.to_string());
961
962 let mut headers = std::collections::HashMap::new();
964 headers.insert("X-Api-Key".to_string(), config.newsapi_key.clone());
965
966 let resp_text = client
967 .get_with_params_and_headers(&url, ¶ms, headers)
968 .await
969 .map_err(|e| WebToolError::Api(format!("NewsAPI request failed: {}", e)))?;
970
971 let json: serde_json::Value = serde_json::from_str(&resp_text)
973 .map_err(|e| WebToolError::Parsing(format!("NewsAPI parse error: {}", e)))?;
974
975 if let Some(status) = json.get("status").and_then(|s| s.as_str()) {
976 if status != "ok" {
977 let msg = json
978 .get("message")
979 .and_then(|m| m.as_str())
980 .unwrap_or("unknown error");
981 return Err(WebToolError::Api(format!("NewsAPI error: {}", msg)));
982 }
983 }
984
985 let mut articles_out: Vec<NewsArticle> = Vec::new();
986 if let Some(arr) = json.get("articles").and_then(|a| a.as_array()) {
987 for a in arr {
988 let title = a
989 .get("title")
990 .and_then(|v| v.as_str())
991 .unwrap_or("")
992 .to_string();
993 let url = a
994 .get("url")
995 .and_then(|v| v.as_str())
996 .unwrap_or("")
997 .to_string();
998 if url.is_empty() || title.is_empty() {
999 continue;
1000 }
1001 let published_at = a
1002 .get("publishedAt")
1003 .and_then(|v| v.as_str())
1004 .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
1005 .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
1006 let description = a
1007 .get("description")
1008 .and_then(|v| v.as_str())
1009 .map(|s| s.to_string());
1010 let content = a
1011 .get("content")
1012 .and_then(|v| v.as_str())
1013 .map(|s| s.to_string());
1014 let source_obj = a.get("source").cloned().unwrap_or_default();
1015 let source = NewsSource {
1016 id: source_obj
1017 .get("id")
1018 .and_then(|v| v.as_str())
1019 .unwrap_or("")
1020 .to_string(),
1021 name: source_obj
1022 .get("name")
1023 .and_then(|v| v.as_str())
1024 .unwrap_or("NewsAPI")
1025 .to_string(),
1026 url: url.clone(),
1027 category: "Mainstream".to_string(),
1028 credibility_score: 75,
1029 accuracy_rating: None,
1030 bias_score: None,
1031 is_verified: true,
1032 logo_url: None,
1033 };
1034 let category = NewsCategory {
1036 primary: "News".to_string(),
1037 sub_category: None,
1038 tags: vec![topic.to_lowercase()],
1039 geographic_scope: vec!["Global".to_string()],
1040 target_audience: "Retail".to_string(),
1041 };
1042 let sentiment = analyze_sentiment(&title, &description, &content);
1044
1045 let market_impact = calculate_market_impact(&sentiment, &source, &category);
1047
1048 let entities = extract_entities_from_text(&title, &description, &content, topic);
1050 let article = NewsArticle {
1051 id: format!("newsapi_{}_{}", published_at.timestamp(), hash64(&url)),
1052 title,
1053 url,
1054 description,
1055 content,
1056 published_at,
1057 source,
1058 category,
1059 sentiment,
1060 market_impact,
1061 entities,
1062 related_assets: vec![topic.to_lowercase()],
1063 quality_metrics: QualityMetrics {
1064 overall_score: 70,
1065 depth_score: 60,
1066 factual_accuracy: 75,
1067 writing_quality: 70,
1068 citation_quality: 60,
1069 uniqueness_score: 50,
1070 reading_difficulty: 5,
1071 },
1072 social_metrics: None,
1073 };
1074 articles_out.push(article);
1075 }
1076 }
1077 Ok(articles_out)
1078}
1079
1080async fn query_cryptopanic(
1082 client: &WebClient,
1083 config: &NewsConfig,
1084 topic: &str,
1085 time_window: &Option<String>,
1086) -> crate::error::Result<Vec<NewsArticle>> {
1087 let base = "https://cryptopanic.com/api/v1/posts";
1088 let window = time_window.clone().unwrap_or_else(|| "24h".to_string());
1089 let _hours = parse_time_window(&window);
1090
1091 let mut params = std::collections::HashMap::new();
1092 params.insert("auth_token".to_string(), config.cryptopanic_key.clone());
1093 params.insert("kind".to_string(), "news".to_string());
1094 params.insert("currencies".to_string(), topic.to_string());
1095 params.insert("public".to_string(), "true".to_string());
1096 params.insert("filter".to_string(), "rising".to_string());
1097
1098 let resp_text = client
1099 .get_with_params(base, ¶ms)
1100 .await
1101 .map_err(|e| WebToolError::Api(format!("CryptoPanic request failed: {}", e)))?;
1102
1103 let json: serde_json::Value = serde_json::from_str(&resp_text)
1104 .map_err(|e| WebToolError::Parsing(format!("CryptoPanic parse error: {}", e)))?;
1105
1106 let mut articles_out = Vec::new();
1107 if let Some(results) = json.get("results").and_then(|v| v.as_array()) {
1108 for item in results {
1109 let title = item
1110 .get("title")
1111 .and_then(|v| v.as_str())
1112 .unwrap_or("")
1113 .to_string();
1114 let url = item
1115 .get("url")
1116 .and_then(|v| v.as_str())
1117 .unwrap_or("")
1118 .to_string();
1119 if url.is_empty() || title.is_empty() {
1120 continue;
1121 }
1122 let published_at = item
1123 .get("published_at")
1124 .and_then(|v| v.as_str())
1125 .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
1126 .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
1127 let domain = item.get("domain").and_then(|v| v.as_str()).unwrap_or("");
1128 let source_obj = item.get("source").cloned().unwrap_or_default();
1129 let source = NewsSource {
1130 id: source_obj
1131 .get("domain")
1132 .and_then(|v| v.as_str())
1133 .unwrap_or(domain)
1134 .to_string(),
1135 name: source_obj
1136 .get("title")
1137 .and_then(|v| v.as_str())
1138 .unwrap_or("CryptoPanic")
1139 .to_string(),
1140 url: url.clone(),
1141 category: "Crypto".to_string(),
1142 credibility_score: 70,
1143 accuracy_rating: None,
1144 bias_score: None,
1145 is_verified: true,
1146 logo_url: None,
1147 };
1148 let category = NewsCategory {
1149 primary: "News".to_string(),
1150 sub_category: None,
1151 tags: vec![topic.to_lowercase()],
1152 geographic_scope: vec!["Global".to_string()],
1153 target_audience: "Crypto".to_string(),
1154 };
1155 let article = NewsArticle {
1156 id: format!("cryptopanic_{}_{}", published_at.timestamp(), hash64(&url)),
1157 title: title.clone(),
1158 url,
1159 description: None,
1160 content: None,
1161 published_at,
1162 source,
1163 category,
1164 sentiment: analyze_sentiment(&title, &None, &None),
1165 market_impact: calculate_market_impact_simple(&title),
1166 entities: extract_entities_from_text(&title, &None, &None, topic),
1167 related_assets: vec![topic.to_lowercase()],
1168 quality_metrics: QualityMetrics {
1169 overall_score: 68,
1170 depth_score: 55,
1171 factual_accuracy: 70,
1172 writing_quality: 65,
1173 citation_quality: 55,
1174 uniqueness_score: 50,
1175 reading_difficulty: 5,
1176 },
1177 social_metrics: None,
1178 };
1179 articles_out.push(article);
1180 }
1181 }
1182 Ok(articles_out)
1183}
1184
1185fn deduplicate_articles(articles: Vec<NewsArticle>) -> Vec<NewsArticle> {
1187 let mut seen_urls = std::collections::HashSet::new();
1190 articles
1191 .into_iter()
1192 .filter(|article| seen_urls.insert(article.url.clone()))
1193 .collect()
1194}
1195
1196async fn analyze_news_collection(articles: &[NewsArticle]) -> crate::error::Result<NewsInsights> {
1198 let overall_sentiment = articles
1199 .iter()
1200 .map(|a| a.sentiment.overall_score)
1201 .sum::<f64>()
1202 / articles.len() as f64;
1203
1204 let mut entity_mentions: HashMap<String, (u32, f64)> = HashMap::new();
1205 let mut themes = Vec::new();
1206 let mut geo_distribution = HashMap::new();
1207
1208 for article in articles {
1209 for entity in &article.entities {
1211 let entry = entity_mentions
1212 .entry(entity.name.clone())
1213 .or_insert((0, 0.0));
1214 entry.0 += entity.mention_count;
1215 entry.1 += entity.sentiment.unwrap_or(0.0);
1216 }
1217
1218 themes.extend(article.category.tags.clone());
1220
1221 for geo in &article.category.geographic_scope {
1223 *geo_distribution.entry(geo.clone()).or_insert(0) += 1;
1224 }
1225 }
1226
1227 let top_entities: Vec<EntityMention> = entity_mentions
1228 .into_iter()
1229 .map(|(name, (count, sentiment))| EntityMention {
1230 name: name.clone(),
1231 mention_count: count,
1232 avg_sentiment: sentiment / count as f64,
1233 entity_type: "Unknown".to_string(), is_trending: count > 5, })
1236 .collect();
1237
1238 let unique_sources = articles
1240 .iter()
1241 .map(|a| &a.source.name)
1242 .collect::<std::collections::HashSet<_>>()
1243 .len() as u32;
1244
1245 let source_diversity = SourceDiversity {
1246 unique_sources,
1247 source_types: HashMap::new(), geographic_sources: HashMap::new(),
1249 credibility_distribution: HashMap::new(),
1250 };
1251
1252 Ok(NewsInsights {
1253 overall_sentiment,
1254 sentiment_trend: determine_sentiment_trend(articles),
1255 top_entities,
1256 dominant_themes: themes,
1257 geographic_distribution: geo_distribution,
1258 source_diversity,
1259 impact_distribution: HashMap::new(), })
1261}
1262
1263async fn extract_trending_topics(
1265 articles: &[NewsArticle],
1266) -> crate::error::Result<Vec<TrendingTopic>> {
1267 let mut topic_counts: HashMap<String, u32> = HashMap::new();
1268 let mut topic_sentiments: HashMap<String, f64> = HashMap::new();
1269
1270 for article in articles {
1271 for tag in &article.category.tags {
1272 *topic_counts.entry(tag.clone()).or_insert(0) += 1;
1273 *topic_sentiments.entry(tag.clone()).or_insert(0.0) += article.sentiment.overall_score;
1274 }
1275 }
1276
1277 let trending_topics: Vec<TrendingTopic> = topic_counts
1278 .into_iter()
1279 .filter(|(_, count)| *count >= 3) .map(|(topic, count)| TrendingTopic {
1281 topic: topic.clone(),
1282 article_count: count,
1283 velocity: count as f64 / 24.0, sentiment: topic_sentiments.get(&topic).unwrap_or(&0.0) / count as f64,
1285 related_keywords: vec![], geographic_focus: vec!["Global".to_string()],
1287 })
1288 .collect();
1289
1290 Ok(trending_topics)
1291}
1292
1293fn calculate_avg_credibility(articles: &[NewsArticle]) -> f64 {
1295 if articles.is_empty() {
1296 return 0.0;
1297 }
1298 articles
1299 .iter()
1300 .map(|a| a.source.credibility_score as f64)
1301 .sum::<f64>()
1302 / articles.len() as f64
1303}
1304
1305fn parse_time_window(window: &str) -> u32 {
1306 match window {
1307 "1h" => 1,
1308 "6h" => 6,
1309 "24h" => 24,
1310 "week" => 168,
1311 _ => 24,
1312 }
1313}
1314
1315fn determine_sentiment_trend(articles: &[NewsArticle]) -> String {
1316 let avg_sentiment = articles
1318 .iter()
1319 .map(|a| a.sentiment.overall_score)
1320 .sum::<f64>()
1321 / articles.len() as f64;
1322
1323 if avg_sentiment > 0.1 {
1324 "Improving".to_string()
1325 } else if avg_sentiment < -0.1 {
1326 "Declining".to_string()
1327 } else {
1328 "Stable".to_string()
1329 }
1330}
1331
1332async fn fetch_trending_articles(
1333 client: &WebClient,
1334 config: &NewsConfig,
1335 time_window: &Option<String>,
1336 _categories: &Option<Vec<String>>,
1337 _min_impact_score: u32,
1338) -> crate::error::Result<Vec<NewsArticle>> {
1339 let mut out: Vec<NewsArticle> = Vec::new();
1341
1342 if !config.cryptopanic_key.is_empty() {
1343 let mut params = std::collections::HashMap::new();
1344 params.insert("auth_token".to_string(), config.cryptopanic_key.clone());
1345 params.insert("filter".to_string(), "rising".to_string());
1346 params.insert("kind".to_string(), "news".to_string());
1347 params.insert("public".to_string(), "true".to_string());
1348 if let Some(window) = time_window.as_ref() {
1349 let _ = window; }
1351 if let Ok(resp) = client
1352 .get_with_params("https://cryptopanic.com/api/v1/posts", ¶ms)
1353 .await
1354 {
1355 if let Ok(json) = serde_json::from_str::<serde_json::Value>(&resp) {
1356 if let Some(results) = json.get("results").and_then(|v| v.as_array()) {
1357 for item in results {
1358 let title = item
1359 .get("title")
1360 .and_then(|v| v.as_str())
1361 .unwrap_or("")
1362 .to_string();
1363 let url = item
1364 .get("url")
1365 .and_then(|v| v.as_str())
1366 .unwrap_or("")
1367 .to_string();
1368 if title.is_empty() || url.is_empty() {
1369 continue;
1370 }
1371 let published_at = item
1372 .get("published_at")
1373 .and_then(|v| v.as_str())
1374 .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
1375 .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
1376 out.push(NewsArticle {
1377 id: format!(
1378 "cp_trending_{}_{}",
1379 published_at.timestamp(),
1380 hash64(&url)
1381 ),
1382 title: title.clone(),
1383 url: url.clone(),
1384 description: None,
1385 content: None,
1386 published_at,
1387 source: NewsSource {
1388 id: "cryptopanic".to_string(),
1389 name: "CryptoPanic".to_string(),
1390 url,
1391 category: "Crypto".to_string(),
1392 credibility_score: 70,
1393 accuracy_rating: None,
1394 bias_score: None,
1395 is_verified: true,
1396 logo_url: None,
1397 },
1398 category: NewsCategory {
1399 primary: "Trending".to_string(),
1400 sub_category: None,
1401 tags: vec![],
1402 geographic_scope: vec!["Global".to_string()],
1403 target_audience: "Crypto".to_string(),
1404 },
1405 sentiment: analyze_sentiment(&title, &None, &None),
1406 market_impact: calculate_market_impact_simple(&title),
1407 entities: vec![],
1408 related_assets: vec![],
1409 quality_metrics: QualityMetrics {
1410 overall_score: 65,
1411 depth_score: 55,
1412 factual_accuracy: 70,
1413 writing_quality: 65,
1414 citation_quality: 55,
1415 uniqueness_score: 50,
1416 reading_difficulty: 5,
1417 },
1418 social_metrics: None,
1419 });
1420 }
1421 }
1422 }
1423 }
1424 }
1425
1426 if out.is_empty() && !config.newsapi_key.is_empty() {
1428 let url = format!("{}/top-headlines", config.base_url);
1429 let mut params = std::collections::HashMap::new();
1430 params.insert("q".to_string(), "crypto OR bitcoin OR ethereum".to_string());
1431 params.insert("language".to_string(), "en".to_string());
1432 params.insert("pageSize".to_string(), "20".to_string());
1433 let mut headers = std::collections::HashMap::new();
1434 headers.insert("X-Api-Key".to_string(), config.newsapi_key.clone());
1435 if let Ok(resp) = client
1436 .get_with_params_and_headers(&url, ¶ms, headers)
1437 .await
1438 {
1439 if let Ok(json) = serde_json::from_str::<serde_json::Value>(&resp) {
1440 if let Some(arts) = json.get("articles").and_then(|v| v.as_array()) {
1441 for a in arts {
1442 let title = a
1443 .get("title")
1444 .and_then(|v| v.as_str())
1445 .unwrap_or("")
1446 .to_string();
1447 let url = a
1448 .get("url")
1449 .and_then(|v| v.as_str())
1450 .unwrap_or("")
1451 .to_string();
1452 if title.is_empty() || url.is_empty() {
1453 continue;
1454 }
1455 let published_at = a
1456 .get("publishedAt")
1457 .and_then(|v| v.as_str())
1458 .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
1459 .map_or_else(Utc::now, |dt| dt.with_timezone(&Utc));
1460 let description = a
1462 .get("description")
1463 .and_then(|v| v.as_str())
1464 .map(|s| s.to_string());
1465 let content = a
1466 .get("content")
1467 .and_then(|v| v.as_str())
1468 .map(|s| s.to_string());
1469 let source_name = a
1470 .get("source")
1471 .and_then(|o| o.get("name"))
1472 .and_then(|v| v.as_str())
1473 .unwrap_or("NewsAPI");
1474
1475 let source = NewsSource {
1476 id: format!("newsapi_{}", hash64(&url)),
1477 name: source_name.to_string(),
1478 url: url.clone(),
1479 category: "Mainstream".to_string(),
1480 credibility_score: 75,
1481 accuracy_rating: None,
1482 bias_score: None,
1483 is_verified: true,
1484 logo_url: None,
1485 };
1486
1487 let category = NewsCategory {
1488 primary: "Trending".to_string(),
1489 sub_category: None,
1490 tags: extract_tags_from_text(&title, &description),
1491 geographic_scope: vec!["Global".to_string()],
1492 target_audience: "Retail".to_string(),
1493 };
1494
1495 out.push(NewsArticle {
1496 id: format!(
1497 "newsapi_trending_{}_{}",
1498 published_at.timestamp(),
1499 hash64(&url)
1500 ),
1501 title: title.clone(),
1502 url,
1503 description: description.clone(),
1504 content: content.clone(),
1505 published_at,
1506 source,
1507 category,
1508 sentiment: analyze_sentiment(&title, &description, &content),
1509 market_impact: calculate_market_impact_from_content(
1510 &title,
1511 &description,
1512 &content,
1513 ),
1514 entities: extract_entities_from_text(
1515 &title,
1516 &description,
1517 &content,
1518 "crypto",
1519 ),
1520 related_assets: extract_crypto_mentions(&title, &description, &content),
1521 quality_metrics: calculate_quality_metrics(
1522 &title,
1523 &description,
1524 &content,
1525 75,
1526 ),
1527 social_metrics: None,
1528 });
1529 }
1530 }
1531 }
1532 }
1533 }
1534
1535 Ok(out)
1536}
1537
1538fn hash64(s: &str) -> u64 {
1539 let mut hasher = std::collections::hash_map::DefaultHasher::default();
1540 s.hash(&mut hasher);
1541 hasher.finish()
1542}
1543
1544async fn analyze_trending_patterns(articles: &[NewsArticle]) -> crate::error::Result<NewsInsights> {
1545 analyze_news_collection(articles).await
1547}
1548
1549async fn detect_breaking_news(
1550 client: &WebClient,
1551 config: &NewsConfig,
1552 keyword: &str,
1553) -> crate::error::Result<Vec<BreakingNewsAlert>> {
1554 let mut alerts: Vec<BreakingNewsAlert> = Vec::new();
1556
1557 let mut articles: Vec<NewsArticle> = Vec::new();
1558 if !config.newsapi_key.is_empty() {
1559 if let Ok(mut a) = query_newsapi(client, config, keyword, &Some("1h".to_string())).await {
1560 articles.append(&mut a);
1561 }
1562 }
1563 if !config.cryptopanic_key.is_empty() {
1564 if let Ok(mut a) = query_cryptopanic(client, config, keyword, &Some("1h".to_string())).await
1565 {
1566 articles.append(&mut a);
1567 }
1568 }
1569
1570 let urgent_terms = [
1572 "breaking",
1573 "urgent",
1574 "exploit",
1575 "hack",
1576 "outage",
1577 "halt",
1578 "SEC",
1579 "lawsuit",
1580 "bankrupt",
1581 "halted",
1582 "paused",
1583 "breach",
1584 "attack",
1585 "flash loan",
1586 "rug",
1587 ];
1588 let now = Utc::now();
1589 let mut grouped: Vec<NewsArticle> = Vec::new();
1590 for a in articles.into_iter() {
1591 if (now - a.published_at) <= chrono::Duration::hours(2) {
1592 let hay = format!(
1593 "{} {} {}",
1594 a.title,
1595 a.description.clone().unwrap_or_default(),
1596 a.url
1597 );
1598 if urgent_terms
1599 .iter()
1600 .any(|t| hay.to_lowercase().contains(&t.to_lowercase()))
1601 {
1602 grouped.push(a);
1603 }
1604 }
1605 }
1606
1607 if !grouped.is_empty() {
1608 let est_impact = MarketImpact {
1609 impact_level: "High".to_string(),
1610 impact_score: 80,
1611 time_horizon: "Immediate".to_string(),
1612 affected_sectors: vec!["Crypto".to_string()],
1613 potential_price_impact: Some(5.0),
1614 historical_correlation: None,
1615 risk_factors: vec!["Volatility".to_string()],
1616 };
1617 let alert = BreakingNewsAlert {
1618 id: format!("breaking_{}_{}", keyword.to_lowercase(), now.timestamp()),
1619 severity: "High".to_string(),
1620 title: format!("Breaking: {} - {} items", keyword, grouped.len()),
1621 description: format!("Detected urgent developments related to '{}'.", keyword),
1622 articles: grouped,
1623 estimated_impact: est_impact,
1624 created_at: now,
1625 expires_at: Some(now + chrono::Duration::hours(4)),
1626 };
1627 alerts.push(alert);
1628 }
1629
1630 Ok(alerts)
1631}
1632
1633fn is_above_severity_threshold(current_severity: &str, threshold: &str) -> bool {
1634 let severity_order = ["Low", "Medium", "High", "Critical"];
1635 let current_index = severity_order
1636 .iter()
1637 .position(|&s| s == current_severity)
1638 .unwrap_or(0);
1639 let threshold_index = severity_order
1640 .iter()
1641 .position(|&s| s == threshold)
1642 .unwrap_or(1);
1643 current_index >= threshold_index
1644}
1645
1646#[allow(dead_code)]
1648fn get_sentiment_analyzer(_context: &ApplicationContext) -> Arc<LexiconSentimentAnalyzer> {
1649 Arc::new(LexiconSentimentAnalyzer::default())
1652}
1653
1654fn analyze_sentiment(
1656 title: &str,
1657 description: &Option<String>,
1658 content: &Option<String>,
1659) -> NewsSentiment {
1660 let analyzer = LexiconSentimentAnalyzer::default();
1662 analyzer.analyze_sentiment_impl(title, description.as_deref(), content.as_deref())
1663}
1664
1665fn calculate_market_impact(
1670 sentiment: &NewsSentiment,
1671 source: &NewsSource,
1672 category: &NewsCategory,
1673) -> MarketImpact {
1674 let sentiment_impact = (sentiment.overall_score.abs() * 100.0 * sentiment.confidence) as u32;
1676
1677 let credibility_factor = source.credibility_score as f64 / 100.0;
1679 let base_score = (sentiment_impact as f64 * credibility_factor) as u32;
1680
1681 let category_multiplier = match category.primary.as_str() {
1683 "Breaking" => 1.5,
1684 "Regulation" => 1.4,
1685 "Security" => 1.3,
1686 "Analysis" => 1.1,
1687 _ => 1.0,
1688 };
1689
1690 let impact_score = ((base_score as f64 * category_multiplier).min(100.0)) as u32;
1691
1692 let impact_level = match impact_score {
1694 80..=100 => "Critical",
1695 60..=79 => "High",
1696 40..=59 => "Medium",
1697 20..=39 => "Low",
1698 _ => "Negligible",
1699 }
1700 .to_string();
1701
1702 let time_horizon = if sentiment.emotions.urgency > 0.7 || category.primary == "Breaking" {
1704 "Immediate"
1705 } else if impact_score > 60 {
1706 "Short-term"
1707 } else {
1708 "Medium-term"
1709 }
1710 .to_string();
1711
1712 let mut affected_sectors = Vec::new();
1714 if category.tags.iter().any(|t| t.contains("defi")) {
1715 affected_sectors.push("DeFi".to_string());
1716 }
1717 if category.tags.iter().any(|t| t.contains("nft")) {
1718 affected_sectors.push("NFT".to_string());
1719 }
1720 if category.tags.iter().any(|t| t.contains("exchange")) {
1721 affected_sectors.push("CEX".to_string());
1722 }
1723 if category.tags.iter().any(|t| t.contains("regulation")) {
1724 affected_sectors.push("Regulatory".to_string());
1725 }
1726 if affected_sectors.is_empty() {
1727 affected_sectors.push("General".to_string());
1728 }
1729
1730 let potential_price_impact = if impact_score > 70 {
1732 Some((sentiment.overall_score * 10.0).abs())
1733 } else if impact_score > 50 {
1734 Some((sentiment.overall_score * 5.0).abs())
1735 } else {
1736 None
1737 };
1738
1739 let mut risk_factors = Vec::new();
1741 if sentiment.emotions.uncertainty > 0.6 {
1742 risk_factors.push("High uncertainty".to_string());
1743 }
1744 if sentiment.emotions.fear > 0.6 {
1745 risk_factors.push("Market fear".to_string());
1746 }
1747 if category
1748 .tags
1749 .iter()
1750 .any(|t| t.contains("hack") || t.contains("exploit"))
1751 {
1752 risk_factors.push("Security breach".to_string());
1753 }
1754 if category.tags.iter().any(|t| t.contains("regulation")) {
1755 risk_factors.push("Regulatory risk".to_string());
1756 }
1757
1758 MarketImpact {
1759 impact_level,
1760 impact_score,
1761 time_horizon,
1762 affected_sectors,
1763 potential_price_impact,
1764 historical_correlation: None, risk_factors,
1766 }
1767}
1768
1769fn calculate_market_impact_simple(title: &str) -> MarketImpact {
1771 let title_lower = title.to_lowercase();
1772
1773 let high_impact = [
1775 "hack",
1776 "exploit",
1777 "sec",
1778 "ban",
1779 "crash",
1780 "surge",
1781 "partnership",
1782 "adoption",
1783 ];
1784 let medium_impact = ["update", "launch", "announce", "report", "analysis"];
1785
1786 let (impact_level, impact_score) = if high_impact.iter().any(|k| title_lower.contains(k)) {
1787 ("High".to_string(), 70)
1788 } else if medium_impact.iter().any(|k| title_lower.contains(k)) {
1789 ("Medium".to_string(), 50)
1790 } else {
1791 ("Low".to_string(), 30)
1792 };
1793
1794 MarketImpact {
1795 impact_level,
1796 impact_score,
1797 time_horizon: "Short-term".to_string(),
1798 affected_sectors: vec!["General".to_string()],
1799 potential_price_impact: None,
1800 historical_correlation: None,
1801 risk_factors: vec![],
1802 }
1803}
1804
1805fn extract_entities_from_text(
1811 title: &str,
1812 description: &Option<String>,
1813 content: &Option<String>,
1814 default_topic: &str,
1815) -> Vec<NewsEntity> {
1816 let full_text = format!(
1817 "{} {} {}",
1818 title,
1819 description.as_deref().unwrap_or(""),
1820 content.as_deref().unwrap_or("")
1821 );
1822
1823 let mut entities = Vec::new();
1824 let mut entity_map: HashMap<String, (String, u32)> = HashMap::new(); let crypto_pattern = r"\b(Bitcoin|BTC|Ethereum|ETH|Solana|SOL|Cardano|ADA|Polkadot|DOT|Chainlink|LINK|Avalanche|AVAX|Polygon|MATIC|Arbitrum|ARB|Optimism|OP)\b";
1828 if let Ok(re) = Regex::new(crypto_pattern) {
1829 for cap in re.captures_iter(&full_text) {
1830 if let Some(matched) = cap.get(0) {
1831 let name = matched.as_str();
1832 let entry = entity_map
1833 .entry(name.to_string())
1834 .or_insert(("Cryptocurrency".to_string(), 0));
1835 entry.1 += 1;
1836 }
1837 }
1838 }
1839
1840 let company_pattern = r"\b(Coinbase|Binance|Kraken|FTX|OpenSea|Uniswap|Aave|Compound|MakerDAO|Circle|Tether|Block\.one|ConsenSys|Ripple|Grayscale|MicroStrategy|Tesla|Square|PayPal)\b";
1842 if let Ok(re) = Regex::new(company_pattern) {
1843 for cap in re.captures_iter(&full_text) {
1844 if let Some(matched) = cap.get(0) {
1845 let name = matched.as_str();
1846 let entry = entity_map
1847 .entry(name.to_string())
1848 .or_insert(("Company".to_string(), 0));
1849 entry.1 += 1;
1850 }
1851 }
1852 }
1853
1854 let person_pattern = r"\b(Vitalik Buterin|Satoshi Nakamoto|CZ|Changpeng Zhao|Sam Bankman-Fried|SBF|Michael Saylor|Elon Musk|Gary Gensler|Jerome Powell)\b";
1856 if let Ok(re) = Regex::new(person_pattern) {
1857 for cap in re.captures_iter(&full_text) {
1858 if let Some(matched) = cap.get(0) {
1859 let name = matched.as_str();
1860 let entry = entity_map
1861 .entry(name.to_string())
1862 .or_insert(("Person".to_string(), 0));
1863 entry.1 += 1;
1864 }
1865 }
1866 }
1867
1868 let protocol_pattern =
1870 r"\b(DeFi|NFT|DAO|DEX|CEX|Layer 2|L2|zkSync|StarkNet|Lightning Network|Cosmos|IBC)\b";
1871 if let Ok(re) = Regex::new(protocol_pattern) {
1872 for cap in re.captures_iter(&full_text) {
1873 if let Some(matched) = cap.get(0) {
1874 let name = matched.as_str();
1875 let entry = entity_map
1876 .entry(name.to_string())
1877 .or_insert(("Protocol".to_string(), 0));
1878 entry.1 += 1;
1879 }
1880 }
1881 }
1882
1883 for (name, (entity_type, count)) in entity_map {
1885 let relevance_score = (count as f64 / 10.0).min(1.0);
1886 entities.push(NewsEntity {
1887 name: name.clone(),
1888 entity_type,
1889 relevance_score,
1890 sentiment: None, mention_count: count,
1892 contexts: vec![], });
1894 }
1895
1896 if entities.is_empty() {
1898 entities.push(NewsEntity {
1899 name: default_topic.to_string(),
1900 entity_type: "Topic".to_string(),
1901 relevance_score: 0.5,
1902 sentiment: None,
1903 mention_count: 1,
1904 contexts: vec![],
1905 });
1906 }
1907
1908 entities.sort_by(|a, b| {
1910 b.relevance_score
1911 .partial_cmp(&a.relevance_score)
1912 .unwrap_or(std::cmp::Ordering::Equal)
1913 });
1914
1915 entities
1916}
1917
1918fn extract_tags_from_text(title: &str, description: &Option<String>) -> Vec<String> {
1920 let full_text = format!(
1921 "{} {}",
1922 title.to_lowercase(),
1923 description.as_deref().unwrap_or("").to_lowercase()
1924 );
1925
1926 let mut tags = Vec::new();
1927
1928 let tag_keywords = [
1930 ("defi", "defi"),
1931 ("nft", "nft"),
1932 ("metaverse", "metaverse"),
1933 ("web3", "web3"),
1934 ("layer 2", "layer2"),
1935 ("stablecoin", "stablecoin"),
1936 ("cbdc", "cbdc"),
1937 ("mining", "mining"),
1938 ("staking", "staking"),
1939 ("governance", "governance"),
1940 ("dao", "dao"),
1941 ("smart contract", "smart-contracts"),
1942 ("regulation", "regulation"),
1943 ("sec", "regulation"),
1944 ("hack", "security"),
1945 ("exploit", "security"),
1946 ("partnership", "partnership"),
1947 ("integration", "integration"),
1948 ("upgrade", "upgrade"),
1949 ("mainnet", "mainnet"),
1950 ("testnet", "testnet"),
1951 ];
1952
1953 for (keyword, tag) in &tag_keywords {
1954 if full_text.contains(keyword) {
1955 tags.push(tag.to_string());
1956 }
1957 }
1958
1959 tags.sort();
1961 tags.dedup();
1962
1963 tags
1964}
1965
1966fn extract_crypto_mentions(
1968 title: &str,
1969 description: &Option<String>,
1970 content: &Option<String>,
1971) -> Vec<String> {
1972 let full_text = format!(
1973 "{} {} {}",
1974 title.to_lowercase(),
1975 description.as_deref().unwrap_or("").to_lowercase(),
1976 content.as_deref().unwrap_or("").to_lowercase()
1977 );
1978
1979 let mut cryptos = Vec::new();
1980
1981 let crypto_list = [
1982 ("bitcoin", "bitcoin"),
1983 ("btc", "bitcoin"),
1984 ("ethereum", "ethereum"),
1985 ("eth", "ethereum"),
1986 ("solana", "solana"),
1987 ("sol", "solana"),
1988 ("cardano", "cardano"),
1989 ("ada", "cardano"),
1990 ("polkadot", "polkadot"),
1991 ("dot", "polkadot"),
1992 ("chainlink", "chainlink"),
1993 ("link", "chainlink"),
1994 ("avalanche", "avalanche"),
1995 ("avax", "avalanche"),
1996 ("polygon", "polygon"),
1997 ("matic", "polygon"),
1998 ("arbitrum", "arbitrum"),
1999 ("optimism", "optimism"),
2000 ("bnb", "bnb"),
2001 ("xrp", "xrp"),
2002 ("doge", "dogecoin"),
2003 ("shib", "shiba-inu"),
2004 ];
2005
2006 for (keyword, crypto) in &crypto_list {
2007 if full_text.contains(keyword) && !cryptos.contains(&crypto.to_string()) {
2008 cryptos.push(crypto.to_string());
2009 }
2010 }
2011
2012 cryptos
2013}
2014
2015fn calculate_quality_metrics(
2017 title: &str,
2018 description: &Option<String>,
2019 content: &Option<String>,
2020 source_credibility: u32,
2021) -> QualityMetrics {
2022 let has_description = description.is_some() && !description.as_ref().unwrap().is_empty();
2023 let _has_content = content.is_some() && !content.as_ref().unwrap().is_empty();
2024
2025 let content_length = content.as_ref().map_or(0, |c| c.len());
2027 let depth_score = if content_length > 2000 {
2028 85
2029 } else if content_length > 1000 {
2030 70
2031 } else if content_length > 500 {
2032 55
2033 } else if has_description {
2034 40
2035 } else {
2036 25
2037 };
2038
2039 let title_words = title.split_whitespace().count();
2041 let writing_quality = if title_words > 5 && title_words < 20 && has_description {
2042 75
2043 } else if title_words > 3 {
2044 65
2045 } else {
2046 50
2047 };
2048
2049 let citation_quality = if content_length > 1000 { 60 } else { 40 };
2051
2052 let overall_score = ((source_credibility as f64 * 0.3)
2054 + (depth_score as f64 * 0.3)
2055 + (writing_quality as f64 * 0.2)
2056 + (citation_quality as f64 * 0.2)) as u32;
2057
2058 QualityMetrics {
2059 overall_score,
2060 depth_score,
2061 factual_accuracy: source_credibility, writing_quality,
2063 citation_quality,
2064 uniqueness_score: 50, reading_difficulty: if content_length > 2000 { 7 } else { 5 },
2066 }
2067}
2068
2069fn calculate_market_impact_from_content(
2071 title: &str,
2072 description: &Option<String>,
2073 content: &Option<String>,
2074) -> MarketImpact {
2075 let sentiment = analyze_sentiment(title, description, content);
2076 let full_text = format!(
2077 "{} {} {}",
2078 title.to_lowercase(),
2079 description.as_deref().unwrap_or("").to_lowercase(),
2080 content.as_deref().unwrap_or("").to_lowercase()
2081 );
2082
2083 let critical_keywords = [
2085 "hack",
2086 "exploit",
2087 "bankrupt",
2088 "sec enforcement",
2089 "criminal",
2090 "fraud",
2091 ];
2092 let high_keywords = [
2093 "partnership",
2094 "adoption",
2095 "integration",
2096 "launch",
2097 "acquisition",
2098 ];
2099 let medium_keywords = ["update", "upgrade", "announce", "report", "analysis"];
2100
2101 let has_critical = critical_keywords.iter().any(|k| full_text.contains(k));
2102 let has_high = high_keywords.iter().any(|k| full_text.contains(k));
2103 let has_medium = medium_keywords.iter().any(|k| full_text.contains(k));
2104
2105 let (impact_level, base_score) = if has_critical {
2106 ("Critical", 85)
2107 } else if has_high {
2108 ("High", 70)
2109 } else if has_medium {
2110 ("Medium", 50)
2111 } else {
2112 ("Low", 30)
2113 };
2114
2115 let impact_score =
2117 ((base_score as f64 * (1.0 + sentiment.overall_score.abs() * 0.3)) as u32).min(100);
2118
2119 MarketImpact {
2120 impact_level: impact_level.to_string(),
2121 impact_score,
2122 time_horizon: if has_critical {
2123 "Immediate"
2124 } else {
2125 "Short-term"
2126 }
2127 .to_string(),
2128 affected_sectors: extract_affected_sectors(&full_text),
2129 potential_price_impact: if impact_score > 70 {
2130 Some((sentiment.overall_score * 7.5).abs())
2131 } else if impact_score > 50 {
2132 Some((sentiment.overall_score * 4.0).abs())
2133 } else {
2134 None
2135 },
2136 historical_correlation: None,
2137 risk_factors: extract_risk_factors(&full_text),
2138 }
2139}
2140
2141fn extract_affected_sectors(text: &str) -> Vec<String> {
2143 let mut sectors = Vec::new();
2144
2145 let sector_keywords = [
2146 ("defi", "DeFi"),
2147 ("nft", "NFT"),
2148 ("exchange", "CEX"),
2149 ("dex", "DEX"),
2150 ("stablecoin", "Stablecoins"),
2151 ("mining", "Mining"),
2152 ("layer 2", "Layer2"),
2153 ("lending", "Lending"),
2154 ("derivatives", "Derivatives"),
2155 ("gamefi", "GameFi"),
2156 ("metaverse", "Metaverse"),
2157 ];
2158
2159 for (keyword, sector) in §or_keywords {
2160 if text.contains(keyword) && !sectors.contains(§or.to_string()) {
2161 sectors.push(sector.to_string());
2162 }
2163 }
2164
2165 if sectors.is_empty() {
2166 sectors.push("General".to_string());
2167 }
2168
2169 sectors
2170}
2171
2172fn extract_risk_factors(text: &str) -> Vec<String> {
2174 let mut risks = Vec::new();
2175
2176 let risk_keywords = [
2177 ("regulation", "Regulatory uncertainty"),
2178 ("sec", "Regulatory action"),
2179 ("hack", "Security vulnerability"),
2180 ("exploit", "Protocol vulnerability"),
2181 ("volatile", "Market volatility"),
2182 ("uncertain", "Market uncertainty"),
2183 ("lawsuit", "Legal risk"),
2184 ("investigation", "Regulatory investigation"),
2185 ("liquidity", "Liquidity risk"),
2186 ("contagion", "Contagion risk"),
2187 ];
2188
2189 for (keyword, risk) in &risk_keywords {
2190 if text.contains(keyword) && !risks.contains(&risk.to_string()) {
2191 risks.push(risk.to_string());
2192 }
2193 }
2194
2195 risks
2196}
2197
2198#[cfg(test)]
2199mod tests {
2200 use super::*;
2201
2202 #[test]
2203 fn test_news_config_default() {
2204 let config = NewsConfig::default();
2205 assert_eq!(config.base_url, "https://newsapi.org/v2");
2206 assert_eq!(config.max_articles, 50);
2207 }
2208
2209 #[test]
2210 fn test_basic_news_functionality() {
2211 let simple_title = "Bitcoin News Test".to_string();
2213 assert!(simple_title.contains("Bitcoin"));
2214
2215 let config = NewsConfig::default();
2217 assert_eq!(config.base_url, "https://newsapi.org/v2");
2218 assert_eq!(config.max_articles, 50);
2219 }
2220
2221 #[test]
2222 fn test_parse_time_window() {
2223 assert_eq!(parse_time_window("1h"), 1);
2224 assert_eq!(parse_time_window("24h"), 24);
2225 assert_eq!(parse_time_window("week"), 168);
2226 }
2227
2228 #[test]
2229 fn test_severity_threshold() {
2230 assert!(is_above_severity_threshold("High", "Medium"));
2231 assert!(!is_above_severity_threshold("Medium", "High"));
2232 assert!(is_above_severity_threshold("Critical", "High"));
2233 }
2234
2235 #[test]
2238 fn test_parse_time_window_all_cases() {
2239 assert_eq!(parse_time_window("6h"), 6);
2240 assert_eq!(parse_time_window("invalid"), 24); assert_eq!(parse_time_window(""), 24); assert_eq!(parse_time_window("random_text"), 24);
2243 }
2244
2245 #[test]
2246 fn test_hash64_function() {
2247 let test_string = "test_string";
2248 let hash1 = hash64(test_string);
2249 let hash2 = hash64(test_string);
2250 assert_eq!(hash1, hash2); let different_hash = hash64("different_string");
2253 assert_ne!(hash1, different_hash); let empty_hash = hash64("");
2256 assert_ne!(hash1, empty_hash); }
2258
2259 #[test]
2260 fn test_severity_threshold_edge_cases() {
2261 assert!(is_above_severity_threshold("Critical", "Critical"));
2263 assert!(is_above_severity_threshold("High", "High"));
2264 assert!(is_above_severity_threshold("Medium", "Medium"));
2265 assert!(is_above_severity_threshold("Low", "Low"));
2266
2267 assert!(!is_above_severity_threshold("Invalid", "Medium"));
2269 assert!(is_above_severity_threshold("Medium", "Invalid"));
2270 assert!(is_above_severity_threshold("Invalid", "Invalid"));
2271 }
2272
2273 #[test]
2274 fn test_calculate_avg_credibility() {
2275 let empty_articles: Vec<NewsArticle> = vec![];
2277 assert_eq!(calculate_avg_credibility(&empty_articles), 0.0);
2278
2279 let single_article = vec![create_test_article_with_credibility(75)];
2281 assert_eq!(calculate_avg_credibility(&single_article), 75.0);
2282
2283 let multiple_articles = vec![
2285 create_test_article_with_credibility(80),
2286 create_test_article_with_credibility(60),
2287 create_test_article_with_credibility(70),
2288 ];
2289 assert_eq!(calculate_avg_credibility(&multiple_articles), 70.0);
2290 }
2291
2292 #[test]
2293 fn test_deduplicate_articles() {
2294 let empty_articles: Vec<NewsArticle> = vec![];
2296 let result = deduplicate_articles(empty_articles);
2297 assert!(result.is_empty());
2298
2299 let unique_articles = vec![
2301 create_test_article_with_url("https://example1.com"),
2302 create_test_article_with_url("https://example2.com"),
2303 ];
2304 let result = deduplicate_articles(unique_articles);
2305 assert_eq!(result.len(), 2);
2306
2307 let duplicate_articles = vec![
2309 create_test_article_with_url("https://example.com"),
2310 create_test_article_with_url("https://example.com"),
2311 create_test_article_with_url("https://different.com"),
2312 ];
2313 let result = deduplicate_articles(duplicate_articles);
2314 assert_eq!(result.len(), 2);
2315 }
2316
2317 #[test]
2318 fn test_determine_sentiment_trend() {
2319 let improving_articles = vec![
2321 create_test_article_with_sentiment(0.5),
2322 create_test_article_with_sentiment(0.6),
2323 ];
2324 assert_eq!(determine_sentiment_trend(&improving_articles), "Improving");
2325
2326 let declining_articles = vec![
2328 create_test_article_with_sentiment(-0.5),
2329 create_test_article_with_sentiment(-0.6),
2330 ];
2331 assert_eq!(determine_sentiment_trend(&declining_articles), "Declining");
2332
2333 let stable_articles = vec![
2335 create_test_article_with_sentiment(0.05),
2336 create_test_article_with_sentiment(-0.05),
2337 ];
2338 assert_eq!(determine_sentiment_trend(&stable_articles), "Stable");
2339
2340 let edge_positive = vec![create_test_article_with_sentiment(0.1)];
2342 assert_eq!(determine_sentiment_trend(&edge_positive), "Stable");
2343
2344 let edge_negative = vec![create_test_article_with_sentiment(-0.1)];
2345 assert_eq!(determine_sentiment_trend(&edge_negative), "Stable");
2346
2347 let just_above_positive = vec![create_test_article_with_sentiment(0.11)];
2348 assert_eq!(determine_sentiment_trend(&just_above_positive), "Improving");
2349
2350 let just_below_negative = vec![create_test_article_with_sentiment(-0.11)];
2351 assert_eq!(determine_sentiment_trend(&just_below_negative), "Declining");
2352 }
2353
2354 #[test]
2355 fn test_analyze_sentiment_comprehensive() {
2356 let positive_sentiment = analyze_sentiment(
2358 "Bitcoin surge brings bullish sentiment to crypto markets",
2359 &Some("Strong gains and positive developments".to_string()),
2360 &Some("The rally continues with strong adoption and growth".to_string()),
2361 );
2362 assert!(positive_sentiment.overall_score > 0.0);
2363 assert_eq!(positive_sentiment.classification, "Bullish");
2364 assert!(positive_sentiment.confidence > 0.0);
2365
2366 let negative_sentiment = analyze_sentiment(
2368 "Bitcoin crash brings bearish sentiment and market fears",
2369 &Some("Major decline and concerns about future".to_string()),
2370 &Some("The drop causes risk and threats to vulnerable markets".to_string()),
2371 );
2372 assert!(negative_sentiment.overall_score < 0.0);
2373 assert_eq!(negative_sentiment.classification, "Bearish");
2374
2375 let neutral_sentiment = analyze_sentiment(
2377 "Bitcoin price analysis report",
2378 &Some("Regular market update".to_string()),
2379 &None,
2380 );
2381 assert_eq!(neutral_sentiment.classification, "Neutral");
2382
2383 let slightly_bullish =
2385 analyze_sentiment("Bitcoin shows mild growth and positive signs", &None, &None);
2386 assert!(slightly_bullish.classification.contains("Bullish"));
2387
2388 let fear_content =
2390 analyze_sentiment("Market crash panic fear worried investors", &None, &None);
2391 assert!(fear_content.emotions.fear > 0.0);
2392
2393 let greed_content = analyze_sentiment(
2394 "Moon lambo rich massive explosive gains profit",
2395 &None,
2396 &None,
2397 );
2398 assert!(greed_content.emotions.greed > 0.0);
2399
2400 let uncertainty_content = analyze_sentiment(
2401 "Maybe perhaps unclear uncertain volatile unpredictable",
2402 &None,
2403 &None,
2404 );
2405 assert!(uncertainty_content.emotions.uncertainty > 0.0);
2406
2407 let bitcoin_surge = analyze_sentiment("Bitcoin surge hits new highs", &None, &None);
2409 assert!(bitcoin_surge.topic_sentiments.contains_key("bitcoin"));
2410 assert!(bitcoin_surge.topic_sentiments["bitcoin"] > 0.0);
2411
2412 let ethereum_crash = analyze_sentiment("Ethereum crash causes major losses", &None, &None);
2413 assert!(ethereum_crash.topic_sentiments.contains_key("ethereum"));
2414 assert!(ethereum_crash.topic_sentiments["ethereum"] < 0.0);
2415 }
2416
2417 #[test]
2418 fn test_analyze_sentiment_edge_cases() {
2419 let empty_sentiment = analyze_sentiment("", &None, &None);
2421 assert_eq!(empty_sentiment.overall_score, 0.0);
2422 assert_eq!(empty_sentiment.classification, "Neutral");
2423
2424 let neutral_words = analyze_sentiment(
2426 "The weather is nice today",
2427 &Some("Random content".to_string()),
2428 &None,
2429 );
2430 assert_eq!(neutral_words.overall_score, 0.0);
2431
2432 let long_content = "bullish ".repeat(100);
2434 let long_sentiment = analyze_sentiment(&long_content, &None, &None);
2435 assert!(long_sentiment.overall_score > 0.0);
2436 assert!(long_sentiment.confidence > 0.0);
2437 }
2438
2439 #[test]
2440 fn test_calculate_market_impact() {
2441 let sentiment = NewsSentiment {
2442 overall_score: 0.5,
2443 confidence: 0.8,
2444 classification: "Bullish".to_string(),
2445 topic_sentiments: HashMap::new(),
2446 emotions: EmotionalIndicators {
2447 fear: 0.0,
2448 greed: 0.0,
2449 excitement: 0.0,
2450 uncertainty: 0.0,
2451 urgency: 0.9, },
2453 key_phrases: vec![],
2454 };
2455
2456 let source = NewsSource {
2457 id: "test".to_string(),
2458 name: "Test Source".to_string(),
2459 url: "https://test.com".to_string(),
2460 category: "Mainstream".to_string(),
2461 credibility_score: 80,
2462 accuracy_rating: None,
2463 bias_score: None,
2464 is_verified: true,
2465 logo_url: None,
2466 };
2467
2468 let breaking_category = NewsCategory {
2470 primary: "Breaking".to_string(),
2471 sub_category: None,
2472 tags: vec!["hack".to_string()],
2473 geographic_scope: vec!["Global".to_string()],
2474 target_audience: "Retail".to_string(),
2475 };
2476
2477 let impact = calculate_market_impact(&sentiment, &source, &breaking_category);
2478 assert_eq!(impact.time_horizon, "Immediate");
2479 assert!(impact.impact_score > 0);
2480 assert!(impact.risk_factors.len() > 0);
2481
2482 let regulation_category = NewsCategory {
2484 primary: "Regulation".to_string(),
2485 sub_category: None,
2486 tags: vec!["regulation".to_string()],
2487 geographic_scope: vec!["US".to_string()],
2488 target_audience: "Institutional".to_string(),
2489 };
2490
2491 let reg_impact = calculate_market_impact(&sentiment, &source, ®ulation_category);
2492 assert!(reg_impact
2493 .risk_factors
2494 .contains(&"Regulatory risk".to_string()));
2495
2496 let fearful_sentiment = NewsSentiment {
2498 overall_score: -0.5,
2499 confidence: 0.8,
2500 classification: "Bearish".to_string(),
2501 topic_sentiments: HashMap::new(),
2502 emotions: EmotionalIndicators {
2503 fear: 0.8,
2504 greed: 0.0,
2505 excitement: 0.0,
2506 uncertainty: 0.7,
2507 urgency: 0.0,
2508 },
2509 key_phrases: vec![],
2510 };
2511
2512 let fear_impact = calculate_market_impact(&fearful_sentiment, &source, &breaking_category);
2513 assert!(fear_impact
2514 .risk_factors
2515 .contains(&"Market fear".to_string()));
2516 assert!(fear_impact
2517 .risk_factors
2518 .contains(&"High uncertainty".to_string()));
2519 }
2520
2521 #[test]
2522 fn test_calculate_market_impact_simple() {
2523 let hack_impact = calculate_market_impact_simple("Major hack exploit discovered");
2525 assert_eq!(hack_impact.impact_level, "High");
2526 assert_eq!(hack_impact.impact_score, 70);
2527
2528 let sec_impact = calculate_market_impact_simple("SEC announces new ban");
2529 assert_eq!(sec_impact.impact_level, "High");
2530 assert_eq!(sec_impact.impact_score, 70);
2531
2532 let launch_impact = calculate_market_impact_simple("Company announces new launch");
2534 assert_eq!(launch_impact.impact_level, "Medium");
2535 assert_eq!(launch_impact.impact_score, 50);
2536
2537 let normal_impact = calculate_market_impact_simple("Regular news update");
2539 assert_eq!(normal_impact.impact_level, "Low");
2540 assert_eq!(normal_impact.impact_score, 30);
2541
2542 let case_impact = calculate_market_impact_simple("HACK discovered in PROTOCOL");
2544 assert_eq!(case_impact.impact_level, "High");
2545 }
2546
2547 #[test]
2548 fn test_extract_entities_from_text() {
2549 let crypto_text = "Bitcoin and Ethereum are leading cryptocurrencies";
2551 let entities = extract_entities_from_text(
2552 crypto_text,
2553 &Some("BTC and ETH analysis".to_string()),
2554 &Some("Solana SOL also mentioned".to_string()),
2555 "crypto",
2556 );
2557
2558 let crypto_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2559 assert!(crypto_names.contains(&"Bitcoin"));
2560 assert!(crypto_names.contains(&"Ethereum"));
2561 assert!(crypto_names.contains(&"Solana"));
2562
2563 let company_text = "Coinbase and Binance are major exchanges";
2565 let company_entities = extract_entities_from_text(company_text, &None, &None, "exchanges");
2566
2567 let company_names: Vec<&str> = company_entities.iter().map(|e| e.name.as_str()).collect();
2568 assert!(company_names.contains(&"Coinbase"));
2569 assert!(company_names.contains(&"Binance"));
2570
2571 let person_text = "Vitalik Buterin and CZ discussed the future";
2573 let person_entities = extract_entities_from_text(person_text, &None, &None, "crypto");
2574
2575 let person_names: Vec<&str> = person_entities.iter().map(|e| e.name.as_str()).collect();
2576 assert!(person_names.contains(&"Vitalik Buterin"));
2577 assert!(person_names.contains(&"CZ"));
2578
2579 let protocol_text = "DeFi and NFT protocols are growing";
2581 let protocol_entities =
2582 extract_entities_from_text(protocol_text, &None, &None, "protocols");
2583
2584 let protocol_names: Vec<&str> = protocol_entities.iter().map(|e| e.name.as_str()).collect();
2585 assert!(protocol_names.contains(&"DeFi"));
2586 assert!(protocol_names.contains(&"NFT"));
2587
2588 let no_entities =
2590 extract_entities_from_text("Random news content", &None, &None, "default_topic");
2591 assert_eq!(no_entities.len(), 1);
2592 assert_eq!(no_entities[0].name, "default_topic");
2593 assert_eq!(no_entities[0].entity_type, "Topic");
2594
2595 let multi_mention_text = "Bitcoin Bitcoin Bitcoin Ethereum";
2597 let sorted_entities =
2598 extract_entities_from_text(multi_mention_text, &None, &None, "crypto");
2599 assert_eq!(sorted_entities[0].name, "Bitcoin");
2601 assert!(sorted_entities[0].mention_count > sorted_entities[1].mention_count);
2602 }
2603
2604 #[test]
2605 fn test_extract_tags_from_text() {
2606 let defi_text = "DeFi protocols are revolutionizing finance";
2608 let defi_tags = extract_tags_from_text(defi_text, &None);
2609 assert!(defi_tags.contains(&"defi".to_string()));
2610
2611 let nft_text = "NFT marketplace sees growth";
2612 let nft_tags = extract_tags_from_text(nft_text, &Some("NFT content".to_string()));
2613 assert!(nft_tags.contains(&"nft".to_string()));
2614
2615 let multi_tag_text = "Layer 2 solutions improve smart contract efficiency";
2616 let multi_tags = extract_tags_from_text(multi_tag_text, &None);
2617 assert!(multi_tags.contains(&"layer2".to_string()));
2618 assert!(multi_tags.contains(&"smart-contracts".to_string()));
2619
2620 let case_text = "DEFI and NFT protocols";
2622 let case_tags = extract_tags_from_text(case_text, &None);
2623 assert!(case_tags.contains(&"defi".to_string()));
2624 assert!(case_tags.contains(&"nft".to_string()));
2625
2626 let no_tag_text = "Random content without keywords";
2628 let no_tags = extract_tags_from_text(no_tag_text, &None);
2629 assert!(no_tags.is_empty());
2630
2631 let duplicate_text = "DeFi DeFi protocols and DeFi systems";
2633 let dup_tags = extract_tags_from_text(duplicate_text, &None);
2634 assert_eq!(dup_tags.iter().filter(|&t| t == "defi").count(), 1);
2635 }
2636
2637 #[test]
2638 fn test_extract_crypto_mentions() {
2639 let crypto_text = "Bitcoin BTC Ethereum ETH prices";
2641 let cryptos = extract_crypto_mentions(
2642 crypto_text,
2643 &Some("Solana SOL analysis".to_string()),
2644 &Some("Cardano ADA update".to_string()),
2645 );
2646
2647 assert!(cryptos.contains(&"bitcoin".to_string()));
2648 assert!(cryptos.contains(&"ethereum".to_string()));
2649 assert!(cryptos.contains(&"solana".to_string()));
2650 assert!(cryptos.contains(&"cardano".to_string()));
2651
2652 let case_text = "BITCOIN and ethereum prices";
2654 let case_cryptos = extract_crypto_mentions(case_text, &None, &None);
2655 assert!(case_cryptos.contains(&"bitcoin".to_string()));
2656 assert!(case_cryptos.contains(&"ethereum".to_string()));
2657
2658 let symbol_text = "BTC Bitcoin analysis";
2660 let symbol_cryptos = extract_crypto_mentions(symbol_text, &None, &None);
2661 assert_eq!(symbol_cryptos.iter().filter(|&c| c == "bitcoin").count(), 1);
2662
2663 let no_crypto_text = "Weather news today";
2665 let no_cryptos = extract_crypto_mentions(no_crypto_text, &None, &None);
2666 assert!(no_cryptos.is_empty());
2667 }
2668
2669 #[test]
2670 fn test_calculate_quality_metrics() {
2671 let high_quality = calculate_quality_metrics(
2673 "Comprehensive Analysis of Market Trends",
2674 &Some("Detailed description of market conditions".to_string()),
2675 &Some("a".repeat(2500)), 90, );
2678 assert_eq!(high_quality.depth_score, 85);
2679 assert_eq!(high_quality.writing_quality, 75);
2680 assert_eq!(high_quality.factual_accuracy, 90);
2681 assert!(high_quality.overall_score > 70);
2682
2683 let medium_quality = calculate_quality_metrics(
2685 "Market Update",
2686 &Some("Brief description".to_string()),
2687 &Some("a".repeat(1500)), 70,
2689 );
2690 assert_eq!(medium_quality.depth_score, 70);
2691
2692 let low_quality = calculate_quality_metrics("News", &None, &None, 50);
2694 assert_eq!(low_quality.depth_score, 25);
2695 assert_eq!(low_quality.writing_quality, 50);
2696
2697 let edge_case = calculate_quality_metrics(
2699 "Short title with exactly five words here",
2700 &Some("Description present".to_string()),
2701 &Some("a".repeat(500)), 75,
2703 );
2704 assert_eq!(edge_case.depth_score, 55);
2705 assert_eq!(edge_case.writing_quality, 75);
2706 }
2707
2708 #[test]
2709 fn test_calculate_market_impact_from_content() {
2710 let critical_content = calculate_market_impact_from_content(
2712 "Major hack exploit discovered",
2713 &Some("Criminal fraud investigation".to_string()),
2714 &Some("SEC enforcement action bankruptcy".to_string()),
2715 );
2716 assert_eq!(critical_content.impact_level, "Critical");
2717 assert_eq!(critical_content.time_horizon, "Immediate");
2718 assert!(critical_content.impact_score >= 85);
2719
2720 let high_content = calculate_market_impact_from_content(
2722 "Partnership announcement",
2723 &Some("Major adoption integration".to_string()),
2724 &Some("New launch acquisition".to_string()),
2725 );
2726 assert_eq!(high_content.impact_level, "High");
2727
2728 let medium_content = calculate_market_impact_from_content(
2730 "Update announcement",
2731 &Some("Upgrade report".to_string()),
2732 &Some("Analysis of trends".to_string()),
2733 );
2734 assert_eq!(medium_content.impact_level, "Medium");
2735
2736 let low_content = calculate_market_impact_from_content(
2738 "Regular news",
2739 &Some("Standard content".to_string()),
2740 &None,
2741 );
2742 assert_eq!(low_content.impact_level, "Low");
2743
2744 let positive_sentiment_content = calculate_market_impact_from_content(
2746 "Partnership bullish surge rally",
2747 &Some("Strong positive growth".to_string()),
2748 &None,
2749 );
2750 assert!(positive_sentiment_content.impact_score > 70);
2751
2752 assert!(critical_content.potential_price_impact.is_some());
2754 assert!(high_content.potential_price_impact.is_some());
2755 assert!(low_content.potential_price_impact.is_none());
2756 }
2757
2758 #[test]
2759 fn test_extract_affected_sectors() {
2760 let defi_text = "defi protocols are growing";
2762 let defi_sectors = extract_affected_sectors(defi_text);
2763 assert!(defi_sectors.contains(&"DeFi".to_string()));
2764
2765 let multi_text = "nft marketplace and exchange listing";
2767 let multi_sectors = extract_affected_sectors(multi_text);
2768 assert!(multi_sectors.contains(&"NFT".to_string()));
2769 assert!(multi_sectors.contains(&"CEX".to_string()));
2770
2771 let general_text = "random news content";
2773 let general_sectors = extract_affected_sectors(general_text);
2774 assert_eq!(general_sectors, vec!["General".to_string()]);
2775
2776 let dup_text = "defi and defi protocols";
2778 let dup_sectors = extract_affected_sectors(dup_text);
2779 assert_eq!(dup_sectors.iter().filter(|&s| s == "DeFi").count(), 1);
2780 }
2781
2782 #[test]
2783 fn test_extract_risk_factors() {
2784 let risk_text = "regulation SEC hack exploit volatile uncertain lawsuit investigation";
2786 let risks = extract_risk_factors(risk_text);
2787
2788 assert!(risks.contains(&"Regulatory uncertainty".to_string()));
2789 assert!(risks.contains(&"Regulatory action".to_string()));
2790 assert!(risks.contains(&"Security vulnerability".to_string()));
2791 assert!(risks.contains(&"Protocol vulnerability".to_string()));
2792 assert!(risks.contains(&"Market volatility".to_string()));
2793 assert!(risks.contains(&"Market uncertainty".to_string()));
2794 assert!(risks.contains(&"Legal risk".to_string()));
2795 assert!(risks.contains(&"Regulatory investigation".to_string()));
2796
2797 let safe_text = "positive news about growth";
2799 let no_risks = extract_risk_factors(safe_text);
2800 assert!(no_risks.is_empty());
2801
2802 let dup_risk_text = "regulation and regulation concerns";
2804 let dup_risks = extract_risk_factors(dup_risk_text);
2805 assert_eq!(
2806 dup_risks
2807 .iter()
2808 .filter(|&r| r == "Regulatory uncertainty")
2809 .count(),
2810 1
2811 );
2812 }
2813
2814 fn create_test_article_with_credibility(credibility: u32) -> NewsArticle {
2816 NewsArticle {
2817 id: "test".to_string(),
2818 title: "Test Article".to_string(),
2819 url: "https://test.com".to_string(),
2820 description: None,
2821 content: None,
2822 published_at: Utc::now(),
2823 source: NewsSource {
2824 id: "test".to_string(),
2825 name: "Test Source".to_string(),
2826 url: "https://test.com".to_string(),
2827 category: "Test".to_string(),
2828 credibility_score: credibility,
2829 accuracy_rating: None,
2830 bias_score: None,
2831 is_verified: true,
2832 logo_url: None,
2833 },
2834 category: NewsCategory {
2835 primary: "Test".to_string(),
2836 sub_category: None,
2837 tags: vec![],
2838 geographic_scope: vec![],
2839 target_audience: "Test".to_string(),
2840 },
2841 sentiment: NewsSentiment {
2842 overall_score: 0.0,
2843 confidence: 0.5,
2844 classification: "Neutral".to_string(),
2845 topic_sentiments: HashMap::new(),
2846 emotions: EmotionalIndicators {
2847 fear: 0.0,
2848 greed: 0.0,
2849 excitement: 0.0,
2850 uncertainty: 0.0,
2851 urgency: 0.0,
2852 },
2853 key_phrases: vec![],
2854 },
2855 market_impact: MarketImpact {
2856 impact_level: "Low".to_string(),
2857 impact_score: 30,
2858 time_horizon: "Short-term".to_string(),
2859 affected_sectors: vec![],
2860 potential_price_impact: None,
2861 historical_correlation: None,
2862 risk_factors: vec![],
2863 },
2864 entities: vec![],
2865 related_assets: vec![],
2866 quality_metrics: QualityMetrics {
2867 overall_score: 50,
2868 depth_score: 50,
2869 factual_accuracy: 50,
2870 writing_quality: 50,
2871 citation_quality: 50,
2872 uniqueness_score: 50,
2873 reading_difficulty: 5,
2874 },
2875 social_metrics: None,
2876 }
2877 }
2878
2879 fn create_test_article_with_url(url: &str) -> NewsArticle {
2880 let mut article = create_test_article_with_credibility(70);
2881 article.url = url.to_string();
2882 article
2883 }
2884
2885 fn create_test_article_with_sentiment(sentiment_score: f64) -> NewsArticle {
2886 let mut article = create_test_article_with_credibility(70);
2887 article.sentiment.overall_score = sentiment_score;
2888 article
2889 }
2890}