1use crate::Vector;
7use anyhow::Result;
8use chrono::{DateTime, Utc};
9use scirs2_core::random::{Random, Rng};
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12use std::sync::{Arc, RwLock};
13use tokio::task::JoinHandle;
14use tracing::{debug, info};
15
16pub struct ResearchNetworkAnalyzer {
18 author_embeddings: Arc<RwLock<HashMap<String, AuthorEmbedding>>>,
20 publication_embeddings: Arc<RwLock<HashMap<String, PublicationEmbedding>>>,
22 citation_network: Arc<RwLock<CitationNetwork>>,
24 collaboration_network: Arc<RwLock<CollaborationNetwork>>,
26 topic_models: Arc<RwLock<HashMap<String, TopicModel>>>,
28 config: ResearchNetworkConfig,
30 analysis_tasks: Vec<JoinHandle<()>>,
32}
33
34#[derive(Debug, Clone)]
36pub struct ResearchNetworkConfig {
37 pub max_authors: usize,
39 pub max_publications: usize,
41 pub citation_update_interval_hours: u64,
43 pub collaboration_analysis_interval_hours: u64,
45 pub impact_prediction_refresh_hours: u64,
47 pub enable_real_time_citation_tracking: bool,
49 pub min_citation_threshold: u32,
51 pub topic_config: TopicModelingConfig,
53 pub embedding_dimension: usize,
55}
56
57impl Default for ResearchNetworkConfig {
58 fn default() -> Self {
59 Self {
60 max_authors: 100_000,
61 max_publications: 1_000_000,
62 citation_update_interval_hours: 24,
63 collaboration_analysis_interval_hours: 12,
64 impact_prediction_refresh_hours: 48,
65 enable_real_time_citation_tracking: true,
66 min_citation_threshold: 5,
67 topic_config: TopicModelingConfig::default(),
68 embedding_dimension: 512,
69 }
70 }
71}
72
73#[derive(Debug, Clone)]
75pub struct TopicModelingConfig {
76 pub num_topics: usize,
78 pub min_word_freq: u32,
80 pub max_doc_freq_ratio: f64,
82 pub lda_iterations: u32,
84 pub coherence_threshold: f64,
86}
87
88impl Default for TopicModelingConfig {
89 fn default() -> Self {
90 Self {
91 num_topics: 50,
92 min_word_freq: 5,
93 max_doc_freq_ratio: 0.8,
94 lda_iterations: 1000,
95 coherence_threshold: 0.4,
96 }
97 }
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct AuthorEmbedding {
103 pub author_id: String,
105 pub name: String,
107 pub affiliations: Vec<String>,
109 pub research_topics: Vec<String>,
111 pub h_index: f64,
113 pub citation_count: u64,
115 pub publication_count: u64,
117 pub embedding: Vector,
119 pub collaboration_score: f64,
121 pub impact_score: f64,
123 pub career_stage: CareerStage,
125 pub last_updated: DateTime<Utc>,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct PublicationEmbedding {
132 pub publication_id: String,
134 pub title: String,
136 pub abstract_text: String,
138 pub authors: Vec<String>,
140 pub venue: String,
142 pub year: u32,
144 pub citation_count: u64,
146 pub topic_distribution: Vec<f64>,
148 pub embedding: Vector,
150 pub predicted_impact: f64,
152 pub publication_type: PublicationType,
154 pub doi: Option<String>,
156 pub last_updated: DateTime<Utc>,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
162pub enum CareerStage {
163 EarlyCareer,
164 MidCareer,
165 SeniorCareer,
166 Emeritus,
167 Unknown,
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
172pub enum PublicationType {
173 JournalArticle,
174 ConferencePaper,
175 BookChapter,
176 Book,
177 Preprint,
178 Thesis,
179 TechnicalReport,
180 Other,
181}
182
183#[derive(Debug, Clone)]
185pub struct CitationNetwork {
186 pub citations: HashMap<String, Vec<Citation>>,
188 pub co_citations: HashMap<String, Vec<CoCitation>>,
190 pub bibliographic_coupling: HashMap<String, Vec<BibliographicCoupling>>,
192 pub temporal_patterns: HashMap<String, Vec<TemporalCitation>>,
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct Citation {
199 pub citing_paper: String,
201 pub cited_paper: String,
203 pub context: String,
205 pub citation_type: CitationType,
207 pub section: PaperSection,
209 pub timestamp: DateTime<Utc>,
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
215pub enum CitationType {
216 Supportive,
217 Contrasting,
218 Neutral,
219 Background,
220 Methodological,
221}
222
223#[derive(Debug, Clone, Serialize, Deserialize)]
225pub enum PaperSection {
226 Introduction,
227 RelatedWork,
228 Methods,
229 Results,
230 Discussion,
231 Conclusion,
232 Other,
233}
234
235#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct CoCitation {
238 pub paper1: String,
240 pub paper2: String,
242 pub co_citation_count: u32,
244 pub similarity_score: f64,
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize)]
250pub struct BibliographicCoupling {
251 pub paper1: String,
253 pub paper2: String,
255 pub shared_references: u32,
257 pub coupling_strength: f64,
259}
260
261#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct TemporalCitation {
264 pub paper_id: String,
266 pub timestamp: DateTime<Utc>,
268 pub citation_count: u64,
270 pub citation_velocity: f64,
272}
273
274#[derive(Debug, Clone)]
276pub struct CollaborationNetwork {
277 pub collaborations: HashMap<String, Vec<Collaboration>>,
279 pub research_communities: Vec<ResearchCommunity>,
281 pub temporal_collaborations: HashMap<String, Vec<TemporalCollaboration>>,
283}
284
285#[derive(Debug, Clone, Serialize, Deserialize)]
287pub struct Collaboration {
288 pub author1: String,
290 pub author2: String,
292 pub joint_publications: u32,
294 pub strength: f64,
296 pub shared_topics: Vec<String>,
298 pub first_collaboration: DateTime<Utc>,
300 pub last_collaboration: DateTime<Utc>,
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct ResearchCommunity {
307 pub community_id: String,
309 pub members: Vec<String>,
311 pub topics: Vec<String>,
313 pub central_members: Vec<String>,
315 pub coherence_score: f64,
317 pub size: usize,
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct TemporalCollaboration {
324 pub author_id: String,
326 pub timestamp: DateTime<Utc>,
328 pub active_collaborations: u32,
330 pub new_collaborations: u32,
332}
333
334#[derive(Debug, Clone)]
336pub struct TopicModel {
337 pub topic_id: String,
339 pub topic_name: String,
341 pub topic_words: Vec<(String, f64)>,
343 pub document_topics: HashMap<String, f64>,
345 pub coherence_score: f64,
347 pub temporal_trend: Vec<TopicTrend>,
349}
350
351#[derive(Debug, Clone, Serialize, Deserialize)]
353pub struct TopicTrend {
354 pub timestamp: DateTime<Utc>,
356 pub popularity: f64,
358 pub publication_count: u64,
360 pub growth_rate: f64,
362}
363
364#[derive(Debug, Clone)]
366pub struct ImpactPredictor {
367 pub feature_weights: HashMap<String, f64>,
369 pub performance_metrics: PredictionMetrics,
371 pub last_update: DateTime<Utc>,
373}
374
375#[derive(Debug, Clone, Serialize, Deserialize)]
377pub struct PredictionMetrics {
378 pub mae: f64,
380 pub rmse: f64,
382 pub r2_score: f64,
384 pub precision_at_k: HashMap<u32, f64>,
386}
387
388impl ResearchNetworkAnalyzer {
389 pub fn new(config: ResearchNetworkConfig) -> Self {
391 Self {
392 author_embeddings: Arc::new(RwLock::new(HashMap::new())),
393 publication_embeddings: Arc::new(RwLock::new(HashMap::new())),
394 citation_network: Arc::new(RwLock::new(CitationNetwork {
395 citations: HashMap::new(),
396 co_citations: HashMap::new(),
397 bibliographic_coupling: HashMap::new(),
398 temporal_patterns: HashMap::new(),
399 })),
400 collaboration_network: Arc::new(RwLock::new(CollaborationNetwork {
401 collaborations: HashMap::new(),
402 research_communities: Vec::new(),
403 temporal_collaborations: HashMap::new(),
404 })),
405 topic_models: Arc::new(RwLock::new(HashMap::new())),
406 config,
407 analysis_tasks: Vec::new(),
408 }
409 }
410
411 pub async fn start(&mut self) -> Result<()> {
413 info!("Starting research network analysis system");
414
415 let citation_task = self.start_citation_analysis().await;
417 self.analysis_tasks.push(citation_task);
418
419 let collaboration_task = self.start_collaboration_analysis().await;
421 self.analysis_tasks.push(collaboration_task);
422
423 let impact_task = self.start_impact_prediction().await;
425 self.analysis_tasks.push(impact_task);
426
427 let topic_task = self.start_topic_modeling().await;
429 self.analysis_tasks.push(topic_task);
430
431 info!("Research network analysis system started successfully");
432 Ok(())
433 }
434
435 pub async fn stop(&mut self) {
437 info!("Stopping research network analysis system");
438
439 for task in self.analysis_tasks.drain(..) {
440 task.abort();
441 }
442
443 info!("Research network analysis system stopped");
444 }
445
446 pub async fn generate_author_embedding(&self, author_id: &str) -> Result<AuthorEmbedding> {
448 {
450 let embeddings = self.author_embeddings.read().unwrap();
451 if let Some(existing) = embeddings.get(author_id) {
452 return Ok(existing.clone());
453 }
454 }
455
456 info!("Generating author embedding for: {}", author_id);
457
458 let author_publications = self.get_author_publications(author_id).await?;
460
461 let collaborations = self.get_author_collaborations(author_id).await?;
463
464 let research_topics = self
466 .extract_author_topics(author_id, &author_publications)
467 .await?;
468
469 let h_index = self.calculate_h_index(&author_publications).await?;
471 let citation_count = author_publications.iter().map(|p| p.citation_count).sum();
472 let collaboration_score = self.calculate_collaboration_score(&collaborations).await?;
473 let impact_score = self.calculate_author_impact_score(author_id).await?;
474
475 let embedding = self
477 .compute_author_embedding_vector(
478 &author_publications,
479 &collaborations,
480 &research_topics,
481 )
482 .await?;
483
484 let career_stage = self
486 .classify_career_stage(citation_count, author_publications.len() as u64, h_index)
487 .await?;
488
489 let author_embedding = AuthorEmbedding {
490 author_id: author_id.to_string(),
491 name: format!("Author_{author_id}"), affiliations: vec!["Unknown".to_string()], research_topics,
494 h_index,
495 citation_count,
496 publication_count: author_publications.len() as u64,
497 embedding,
498 collaboration_score,
499 impact_score,
500 career_stage,
501 last_updated: Utc::now(),
502 };
503
504 {
506 let mut embeddings = self.author_embeddings.write().unwrap();
507 embeddings.insert(author_id.to_string(), author_embedding.clone());
508 }
509
510 info!(
511 "Generated author embedding for {} with h-index: {:.2}",
512 author_id, h_index
513 );
514 Ok(author_embedding)
515 }
516
517 pub async fn generate_publication_embedding(
519 &self,
520 publication_id: &str,
521 ) -> Result<PublicationEmbedding> {
522 {
524 let embeddings = self.publication_embeddings.read().unwrap();
525 if let Some(existing) = embeddings.get(publication_id) {
526 return Ok(existing.clone());
527 }
528 }
529
530 info!("Generating publication embedding for: {}", publication_id);
531
532 let title = format!("Publication_{publication_id}");
534 let abstract_text = format!("Abstract for publication {publication_id}");
535 let authors = vec![format!("author_{}", publication_id)];
536 let venue = "Unknown Venue".to_string();
537 let year = 2023; let doi = Some(format!("10.1000/{publication_id}"));
539
540 let citation_count = self.get_publication_citation_count(publication_id).await?;
542
543 let topic_distribution = self
545 .extract_publication_topics(publication_id, &abstract_text)
546 .await?;
547
548 let embedding = self
550 .compute_publication_embedding_vector(&title, &abstract_text, &topic_distribution)
551 .await?;
552
553 let predicted_impact = self
555 .predict_publication_impact(citation_count, &topic_distribution, &embedding)
556 .await?;
557
558 let publication_embedding = PublicationEmbedding {
559 publication_id: publication_id.to_string(),
560 title,
561 abstract_text,
562 authors,
563 venue,
564 year,
565 citation_count,
566 topic_distribution,
567 embedding,
568 predicted_impact,
569 publication_type: PublicationType::JournalArticle, doi,
571 last_updated: Utc::now(),
572 };
573
574 {
576 let mut embeddings = self.publication_embeddings.write().unwrap();
577 embeddings.insert(publication_id.to_string(), publication_embedding.clone());
578 }
579
580 info!(
581 "Generated publication embedding for {} with predicted impact: {:.3}",
582 publication_id, predicted_impact
583 );
584 Ok(publication_embedding)
585 }
586
587 pub async fn analyze_citation_patterns(&self, publication_id: &str) -> Result<Vec<Citation>> {
589 let network = self.citation_network.read().unwrap();
590
591 if let Some(citations) = network.citations.get(publication_id) {
592 Ok(citations.clone())
593 } else {
594 Ok(Vec::new())
595 }
596 }
597
598 pub async fn find_similar_authors(
600 &self,
601 author_id: &str,
602 k: usize,
603 ) -> Result<Vec<(String, f64)>> {
604 let target_embedding = self.generate_author_embedding(author_id).await?;
605 let embeddings_data: Vec<(String, AuthorEmbedding)> = {
606 let embeddings = self.author_embeddings.read().unwrap();
607 embeddings
608 .iter()
609 .filter(|(other_id, _)| *other_id != author_id)
610 .map(|(id, emb)| (id.clone(), emb.clone()))
611 .collect()
612 };
613
614 let mut similarities = Vec::new();
615
616 for (other_id, other_embedding) in embeddings_data {
617 let similarity = self
618 .calculate_author_similarity(&target_embedding, &other_embedding)
619 .await?;
620 similarities.push((other_id, similarity));
621 }
622
623 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
625 similarities.truncate(k);
626
627 Ok(similarities)
628 }
629
630 pub async fn predict_research_impact(&self, publication_id: &str) -> Result<f64> {
632 let publication = self.generate_publication_embedding(publication_id).await?;
633 Ok(publication.predicted_impact)
634 }
635
636 pub async fn analyze_research_trends(
638 &self,
639 topic: &str,
640 years: u32,
641 ) -> Result<Vec<TopicTrend>> {
642 let topics = self.topic_models.read().unwrap();
643
644 if let Some(topic_model) = topics.get(topic) {
645 let cutoff_date = Utc::now() - chrono::Duration::days((years * 365) as i64);
647 let recent_trends: Vec<TopicTrend> = topic_model
648 .temporal_trend
649 .iter()
650 .filter(|trend| trend.timestamp > cutoff_date)
651 .cloned()
652 .collect();
653
654 Ok(recent_trends)
655 } else {
656 Ok(Vec::new())
657 }
658 }
659
660 pub async fn get_research_communities(&self) -> Result<Vec<ResearchCommunity>> {
662 let network = self.collaboration_network.read().unwrap();
663 Ok(network.research_communities.clone())
664 }
665
666 pub async fn add_citation(&self, citation: Citation) -> Result<()> {
668 let mut network = self.citation_network.write().unwrap();
669
670 network
671 .citations
672 .entry(citation.citing_paper.clone())
673 .or_default()
674 .push(citation);
675
676 info!("Added new citation to network");
677 Ok(())
678 }
679
680 async fn get_author_publications(&self, _author_id: &str) -> Result<Vec<PublicationEmbedding>> {
683 Ok(Vec::new())
685 }
686
687 async fn get_author_collaborations(&self, _author_id: &str) -> Result<Vec<Collaboration>> {
688 Ok(Vec::new())
690 }
691
692 async fn extract_author_topics(
693 &self,
694 _author_id: &str,
695 _publications: &[PublicationEmbedding],
696 ) -> Result<Vec<String>> {
697 Ok(vec![
699 "machine_learning".to_string(),
700 "natural_language_processing".to_string(),
701 ])
702 }
703
704 async fn calculate_h_index(&self, publications: &[PublicationEmbedding]) -> Result<f64> {
705 let mut citation_counts: Vec<u64> = publications.iter().map(|p| p.citation_count).collect();
706
707 citation_counts.sort_by(|a, b| b.cmp(a));
708
709 let mut h_index = 0;
710 for (i, &citations) in citation_counts.iter().enumerate() {
711 if citations >= (i + 1) as u64 {
712 h_index = i + 1;
713 } else {
714 break;
715 }
716 }
717
718 Ok(h_index as f64)
719 }
720
721 async fn calculate_collaboration_score(&self, collaborations: &[Collaboration]) -> Result<f64> {
722 if collaborations.is_empty() {
723 return Ok(0.0);
724 }
725
726 let total_strength: f64 = collaborations.iter().map(|c| c.strength).sum();
727 Ok(total_strength / collaborations.len() as f64)
728 }
729
730 async fn calculate_author_impact_score(&self, _author_id: &str) -> Result<f64> {
731 Ok(0.75)
733 }
734
735 async fn compute_author_embedding_vector(
736 &self,
737 _publications: &[PublicationEmbedding],
738 _collaborations: &[Collaboration],
739 _topics: &[String],
740 ) -> Result<Vector> {
741 let values = (0..self.config.embedding_dimension)
743 .map(|_| {
744 let mut random = Random::default();
745 random.random::<f32>()
746 })
747 .collect();
748 Ok(Vector::new(values))
749 }
750
751 async fn classify_career_stage(
752 &self,
753 citation_count: u64,
754 publication_count: u64,
755 h_index: f64,
756 ) -> Result<CareerStage> {
757 if citation_count < 100 && publication_count < 10 && h_index < 5.0 {
758 Ok(CareerStage::EarlyCareer)
759 } else if citation_count < 1000 && publication_count < 50 && h_index < 20.0 {
760 Ok(CareerStage::MidCareer)
761 } else if citation_count >= 1000 || publication_count >= 50 || h_index >= 20.0 {
762 Ok(CareerStage::SeniorCareer)
763 } else {
764 Ok(CareerStage::Unknown)
765 }
766 }
767
768 async fn get_publication_citation_count(&self, _publication_id: &str) -> Result<u64> {
769 let mut random = Random::default();
771 Ok(random.random::<u64>() % 100)
772 }
773
774 async fn extract_publication_topics(
775 &self,
776 _publication_id: &str,
777 _abstract_text: &str,
778 ) -> Result<Vec<f64>> {
779 let num_topics = self.config.topic_config.num_topics;
781 let mut distribution = vec![0.0; num_topics];
782
783 let total: f64 = (0..num_topics)
785 .map(|_| {
786 let mut random = Random::default();
787 random.random::<f64>()
788 })
789 .sum();
790 for item in distribution.iter_mut().take(num_topics) {
791 let mut random = Random::default();
792 *item = random.random::<f64>() / total;
793 }
794
795 Ok(distribution)
796 }
797
798 async fn compute_publication_embedding_vector(
799 &self,
800 _title: &str,
801 _abstract_text: &str,
802 _topic_distribution: &[f64],
803 ) -> Result<Vector> {
804 let values = (0..self.config.embedding_dimension)
806 .map(|_| {
807 let mut random = Random::default();
808 random.random::<f32>()
809 })
810 .collect();
811 Ok(Vector::new(values))
812 }
813
814 async fn predict_publication_impact(
815 &self,
816 citation_count: u64,
817 _topic_distribution: &[f64],
818 _embedding: &Vector,
819 ) -> Result<f64> {
820 let base_impact = (citation_count as f64).ln() / 10.0;
822 Ok(base_impact.clamp(0.0, 1.0))
823 }
824
825 async fn calculate_author_similarity(
826 &self,
827 author1: &AuthorEmbedding,
828 author2: &AuthorEmbedding,
829 ) -> Result<f64> {
830 let embedding1 = &author1.embedding.values;
832 let embedding2 = &author2.embedding.values;
833
834 let dot_product: f32 = embedding1
835 .iter()
836 .zip(embedding2.iter())
837 .map(|(a, b)| a * b)
838 .sum();
839 let norm1: f32 = embedding1.iter().map(|x| x * x).sum::<f32>().sqrt();
840 let norm2: f32 = embedding2.iter().map(|x| x * x).sum::<f32>().sqrt();
841
842 let cosine_similarity = if norm1 > 0.0 && norm2 > 0.0 {
843 dot_product / (norm1 * norm2)
844 } else {
845 0.0
846 };
847
848 let topic_similarity = self
850 .calculate_topic_similarity(&author1.research_topics, &author2.research_topics)
851 .await?;
852
853 let final_similarity = 0.7 * cosine_similarity as f64 + 0.3 * topic_similarity;
855
856 Ok(final_similarity)
857 }
858
859 async fn calculate_topic_similarity(
860 &self,
861 topics1: &[String],
862 topics2: &[String],
863 ) -> Result<f64> {
864 let set1: HashSet<_> = topics1.iter().collect();
865 let set2: HashSet<_> = topics2.iter().collect();
866
867 let intersection = set1.intersection(&set2).count();
868 let union = set1.union(&set2).count();
869
870 if union > 0 {
871 Ok(intersection as f64 / union as f64)
872 } else {
873 Ok(0.0)
874 }
875 }
876
877 async fn start_citation_analysis(&self) -> JoinHandle<()> {
880 let _citation_network = Arc::clone(&self.citation_network);
881 let interval =
882 std::time::Duration::from_secs(self.config.citation_update_interval_hours * 3600);
883
884 tokio::spawn(async move {
885 let mut interval_timer = tokio::time::interval(interval);
886
887 loop {
888 interval_timer.tick().await;
889
890 info!("Performing citation network analysis");
892
893 debug!("Citation network analysis completed");
897 }
898 })
899 }
900
901 async fn start_collaboration_analysis(&self) -> JoinHandle<()> {
902 let _collaboration_network = Arc::clone(&self.collaboration_network);
903 let interval = std::time::Duration::from_secs(
904 self.config.collaboration_analysis_interval_hours * 3600,
905 );
906
907 tokio::spawn(async move {
908 let mut interval_timer = tokio::time::interval(interval);
909
910 loop {
911 interval_timer.tick().await;
912
913 info!("Performing collaboration network analysis");
915
916 debug!("Collaboration network analysis completed");
920 }
921 })
922 }
923
924 async fn start_impact_prediction(&self) -> JoinHandle<()> {
925 let interval =
926 std::time::Duration::from_secs(self.config.impact_prediction_refresh_hours * 3600);
927
928 tokio::spawn(async move {
929 let mut interval_timer = tokio::time::interval(interval);
930
931 loop {
932 interval_timer.tick().await;
933
934 info!("Refreshing impact prediction models");
936
937 debug!("Impact prediction models refreshed");
941 }
942 })
943 }
944
945 async fn start_topic_modeling(&self) -> JoinHandle<()> {
946 let topic_models = Arc::clone(&self.topic_models);
947 let _config = self.config.clone();
948 let interval = std::time::Duration::from_secs(24 * 3600); tokio::spawn(async move {
951 let mut interval_timer = tokio::time::interval(interval);
952
953 loop {
954 interval_timer.tick().await;
955
956 info!("Updating topic models");
958
959 let topic_model = TopicModel {
961 topic_id: "machine_learning".to_string(),
962 topic_name: "Machine Learning".to_string(),
963 topic_words: vec![
964 ("neural".to_string(), 0.1),
965 ("network".to_string(), 0.09),
966 ("learning".to_string(), 0.08),
967 ("algorithm".to_string(), 0.07),
968 ("model".to_string(), 0.06),
969 ],
970 document_topics: HashMap::new(),
971 coherence_score: 0.75,
972 temporal_trend: vec![
973 TopicTrend {
974 timestamp: Utc::now() - chrono::Duration::days(365),
975 popularity: 0.6,
976 publication_count: 1000,
977 growth_rate: 0.15,
978 },
979 TopicTrend {
980 timestamp: Utc::now(),
981 popularity: 0.8,
982 publication_count: 1500,
983 growth_rate: 0.25,
984 },
985 ],
986 };
987
988 {
989 let mut models = topic_models.write().unwrap();
990 models.insert("machine_learning".to_string(), topic_model);
991 }
992
993 debug!("Topic models updated");
994 }
995 })
996 }
997}
998
999#[derive(Debug, Clone, Serialize, Deserialize)]
1001pub struct NetworkMetrics {
1002 pub total_authors: usize,
1004 pub total_publications: usize,
1006 pub total_citations: u64,
1008 pub avg_citations_per_paper: f64,
1010 pub network_density: f64,
1012 pub clustering_coefficient: f64,
1014 pub average_path_length: f64,
1016 pub top_authors: Vec<String>,
1018 pub trending_topics: Vec<String>,
1020}
1021
1022impl ResearchNetworkAnalyzer {
1023 pub async fn get_network_metrics(&self) -> Result<NetworkMetrics> {
1025 let author_embeddings = self.author_embeddings.read().unwrap();
1026 let publication_embeddings = self.publication_embeddings.read().unwrap();
1027
1028 let total_authors = author_embeddings.len();
1029 let total_publications = publication_embeddings.len();
1030 let total_citations = publication_embeddings
1031 .values()
1032 .map(|p| p.citation_count)
1033 .sum();
1034
1035 let avg_citations_per_paper = if total_publications > 0 {
1036 total_citations as f64 / total_publications as f64
1037 } else {
1038 0.0
1039 };
1040
1041 let mut author_scores: Vec<_> = author_embeddings
1043 .iter()
1044 .map(|(id, embedding)| (id.clone(), embedding.impact_score))
1045 .collect();
1046 author_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1047 let top_authors: Vec<String> = author_scores
1048 .into_iter()
1049 .take(10)
1050 .map(|(id, _)| id)
1051 .collect();
1052
1053 Ok(NetworkMetrics {
1054 total_authors,
1055 total_publications,
1056 total_citations,
1057 avg_citations_per_paper,
1058 network_density: 0.1, clustering_coefficient: 0.3, average_path_length: 4.5, top_authors,
1062 trending_topics: vec!["machine_learning".to_string(), "deep_learning".to_string()],
1063 })
1064 }
1065}
1066
1067#[cfg(test)]
1068mod tests {
1069 use super::*;
1070
1071 #[tokio::test]
1072 async fn test_research_network_analyzer_creation() {
1073 let config = ResearchNetworkConfig::default();
1074 let analyzer = ResearchNetworkAnalyzer::new(config);
1075
1076 assert_eq!(analyzer.author_embeddings.read().unwrap().len(), 0);
1078 assert_eq!(analyzer.publication_embeddings.read().unwrap().len(), 0);
1079 }
1080
1081 #[tokio::test]
1082 async fn test_author_embedding_generation() {
1083 let config = ResearchNetworkConfig::default();
1084 let analyzer = ResearchNetworkAnalyzer::new(config);
1085
1086 let result = analyzer.generate_author_embedding("test_author").await;
1087 assert!(result.is_ok());
1088
1089 let embedding = result.unwrap();
1090 assert_eq!(embedding.author_id, "test_author");
1091 assert!(embedding.h_index >= 0.0);
1092 assert_eq!(embedding.embedding.values.len(), 512); }
1094
1095 #[tokio::test]
1096 async fn test_publication_embedding_generation() {
1097 let config = ResearchNetworkConfig::default();
1098 let analyzer = ResearchNetworkAnalyzer::new(config);
1099
1100 let result = analyzer
1101 .generate_publication_embedding("test_publication")
1102 .await;
1103 assert!(result.is_ok());
1104
1105 let embedding = result.unwrap();
1106 assert_eq!(embedding.publication_id, "test_publication");
1107 assert!(embedding.predicted_impact >= 0.0);
1108 assert!(embedding.predicted_impact <= 1.0);
1109 }
1110
1111 #[tokio::test]
1112 async fn test_h_index_calculation() {
1113 let config = ResearchNetworkConfig::default();
1114 let analyzer = ResearchNetworkAnalyzer::new(config);
1115
1116 let publications = vec![
1118 PublicationEmbedding {
1119 publication_id: "p1".to_string(),
1120 title: "Test 1".to_string(),
1121 abstract_text: "Abstract 1".to_string(),
1122 authors: vec!["author1".to_string()],
1123 venue: "Venue 1".to_string(),
1124 year: 2023,
1125 citation_count: 10,
1126 topic_distribution: vec![],
1127 embedding: Vector::new(vec![]),
1128 predicted_impact: 0.5,
1129 publication_type: PublicationType::JournalArticle,
1130 doi: None,
1131 last_updated: Utc::now(),
1132 },
1133 PublicationEmbedding {
1134 publication_id: "p2".to_string(),
1135 title: "Test 2".to_string(),
1136 abstract_text: "Abstract 2".to_string(),
1137 authors: vec!["author1".to_string()],
1138 venue: "Venue 2".to_string(),
1139 year: 2023,
1140 citation_count: 5,
1141 topic_distribution: vec![],
1142 embedding: Vector::new(vec![]),
1143 predicted_impact: 0.3,
1144 publication_type: PublicationType::JournalArticle,
1145 doi: None,
1146 last_updated: Utc::now(),
1147 },
1148 ];
1149
1150 let h_index = analyzer.calculate_h_index(&publications).await.unwrap();
1151 assert_eq!(h_index, 2.0); }
1153
1154 #[test]
1155 fn test_career_stage_classification() {
1156 let rt = tokio::runtime::Runtime::new().unwrap();
1158 let config = ResearchNetworkConfig::default();
1159 let analyzer = ResearchNetworkAnalyzer::new(config);
1160
1161 let stage = rt
1162 .block_on(analyzer.classify_career_stage(50, 5, 3.0))
1163 .unwrap();
1164 assert!(matches!(stage, CareerStage::EarlyCareer));
1165
1166 let stage = rt
1168 .block_on(analyzer.classify_career_stage(2000, 100, 25.0))
1169 .unwrap();
1170 assert!(matches!(stage, CareerStage::SeniorCareer));
1171 }
1172
1173 #[tokio::test]
1174 async fn test_network_metrics() {
1175 let config = ResearchNetworkConfig::default();
1176 let analyzer = ResearchNetworkAnalyzer::new(config);
1177
1178 let _author_embedding = analyzer
1180 .generate_author_embedding("test_author")
1181 .await
1182 .unwrap();
1183 let _publication_embedding = analyzer
1184 .generate_publication_embedding("test_publication")
1185 .await
1186 .unwrap();
1187
1188 let metrics = analyzer.get_network_metrics().await.unwrap();
1189 assert_eq!(metrics.total_authors, 1);
1190 assert_eq!(metrics.total_publications, 1);
1191 }
1192}