1use crate::Vector;
7use anyhow::Result;
8use chrono::{DateTime, Utc};
9use scirs2_core::random::{Random, Rng};
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12use std::sync::{Arc, RwLock};
13use tokio::task::JoinHandle;
14use tracing::{debug, info};
15
16pub struct ResearchNetworkAnalyzer {
18 author_embeddings: Arc<RwLock<HashMap<String, AuthorEmbedding>>>,
20 publication_embeddings: Arc<RwLock<HashMap<String, PublicationEmbedding>>>,
22 citation_network: Arc<RwLock<CitationNetwork>>,
24 collaboration_network: Arc<RwLock<CollaborationNetwork>>,
26 topic_models: Arc<RwLock<HashMap<String, TopicModel>>>,
28 config: ResearchNetworkConfig,
30 analysis_tasks: Vec<JoinHandle<()>>,
32}
33
34#[derive(Debug, Clone)]
36pub struct ResearchNetworkConfig {
37 pub max_authors: usize,
39 pub max_publications: usize,
41 pub citation_update_interval_hours: u64,
43 pub collaboration_analysis_interval_hours: u64,
45 pub impact_prediction_refresh_hours: u64,
47 pub enable_real_time_citation_tracking: bool,
49 pub min_citation_threshold: u32,
51 pub topic_config: TopicModelingConfig,
53 pub embedding_dimension: usize,
55}
56
57impl Default for ResearchNetworkConfig {
58 fn default() -> Self {
59 Self {
60 max_authors: 100_000,
61 max_publications: 1_000_000,
62 citation_update_interval_hours: 24,
63 collaboration_analysis_interval_hours: 12,
64 impact_prediction_refresh_hours: 48,
65 enable_real_time_citation_tracking: true,
66 min_citation_threshold: 5,
67 topic_config: TopicModelingConfig::default(),
68 embedding_dimension: 512,
69 }
70 }
71}
72
73#[derive(Debug, Clone)]
75pub struct TopicModelingConfig {
76 pub num_topics: usize,
78 pub min_word_freq: u32,
80 pub max_doc_freq_ratio: f64,
82 pub lda_iterations: u32,
84 pub coherence_threshold: f64,
86}
87
88impl Default for TopicModelingConfig {
89 fn default() -> Self {
90 Self {
91 num_topics: 50,
92 min_word_freq: 5,
93 max_doc_freq_ratio: 0.8,
94 lda_iterations: 1000,
95 coherence_threshold: 0.4,
96 }
97 }
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct AuthorEmbedding {
103 pub author_id: String,
105 pub name: String,
107 pub affiliations: Vec<String>,
109 pub research_topics: Vec<String>,
111 pub h_index: f64,
113 pub citation_count: u64,
115 pub publication_count: u64,
117 pub embedding: Vector,
119 pub collaboration_score: f64,
121 pub impact_score: f64,
123 pub career_stage: CareerStage,
125 pub last_updated: DateTime<Utc>,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct PublicationEmbedding {
132 pub publication_id: String,
134 pub title: String,
136 pub abstract_text: String,
138 pub authors: Vec<String>,
140 pub venue: String,
142 pub year: u32,
144 pub citation_count: u64,
146 pub topic_distribution: Vec<f64>,
148 pub embedding: Vector,
150 pub predicted_impact: f64,
152 pub publication_type: PublicationType,
154 pub doi: Option<String>,
156 pub last_updated: DateTime<Utc>,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
162pub enum CareerStage {
163 EarlyCareer,
164 MidCareer,
165 SeniorCareer,
166 Emeritus,
167 Unknown,
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
172pub enum PublicationType {
173 JournalArticle,
174 ConferencePaper,
175 BookChapter,
176 Book,
177 Preprint,
178 Thesis,
179 TechnicalReport,
180 Other,
181}
182
183#[derive(Debug, Clone)]
185pub struct CitationNetwork {
186 pub citations: HashMap<String, Vec<Citation>>,
188 pub co_citations: HashMap<String, Vec<CoCitation>>,
190 pub bibliographic_coupling: HashMap<String, Vec<BibliographicCoupling>>,
192 pub temporal_patterns: HashMap<String, Vec<TemporalCitation>>,
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct Citation {
199 pub citing_paper: String,
201 pub cited_paper: String,
203 pub context: String,
205 pub citation_type: CitationType,
207 pub section: PaperSection,
209 pub timestamp: DateTime<Utc>,
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
215pub enum CitationType {
216 Supportive,
217 Contrasting,
218 Neutral,
219 Background,
220 Methodological,
221}
222
223#[derive(Debug, Clone, Serialize, Deserialize)]
225pub enum PaperSection {
226 Introduction,
227 RelatedWork,
228 Methods,
229 Results,
230 Discussion,
231 Conclusion,
232 Other,
233}
234
235#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct CoCitation {
238 pub paper1: String,
240 pub paper2: String,
242 pub co_citation_count: u32,
244 pub similarity_score: f64,
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize)]
250pub struct BibliographicCoupling {
251 pub paper1: String,
253 pub paper2: String,
255 pub shared_references: u32,
257 pub coupling_strength: f64,
259}
260
261#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct TemporalCitation {
264 pub paper_id: String,
266 pub timestamp: DateTime<Utc>,
268 pub citation_count: u64,
270 pub citation_velocity: f64,
272}
273
274#[derive(Debug, Clone)]
276pub struct CollaborationNetwork {
277 pub collaborations: HashMap<String, Vec<Collaboration>>,
279 pub research_communities: Vec<ResearchCommunity>,
281 pub temporal_collaborations: HashMap<String, Vec<TemporalCollaboration>>,
283}
284
285#[derive(Debug, Clone, Serialize, Deserialize)]
287pub struct Collaboration {
288 pub author1: String,
290 pub author2: String,
292 pub joint_publications: u32,
294 pub strength: f64,
296 pub shared_topics: Vec<String>,
298 pub first_collaboration: DateTime<Utc>,
300 pub last_collaboration: DateTime<Utc>,
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct ResearchCommunity {
307 pub community_id: String,
309 pub members: Vec<String>,
311 pub topics: Vec<String>,
313 pub central_members: Vec<String>,
315 pub coherence_score: f64,
317 pub size: usize,
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct TemporalCollaboration {
324 pub author_id: String,
326 pub timestamp: DateTime<Utc>,
328 pub active_collaborations: u32,
330 pub new_collaborations: u32,
332}
333
334#[derive(Debug, Clone)]
336pub struct TopicModel {
337 pub topic_id: String,
339 pub topic_name: String,
341 pub topic_words: Vec<(String, f64)>,
343 pub document_topics: HashMap<String, f64>,
345 pub coherence_score: f64,
347 pub temporal_trend: Vec<TopicTrend>,
349}
350
351#[derive(Debug, Clone, Serialize, Deserialize)]
353pub struct TopicTrend {
354 pub timestamp: DateTime<Utc>,
356 pub popularity: f64,
358 pub publication_count: u64,
360 pub growth_rate: f64,
362}
363
364#[derive(Debug, Clone)]
366pub struct ImpactPredictor {
367 pub feature_weights: HashMap<String, f64>,
369 pub performance_metrics: PredictionMetrics,
371 pub last_update: DateTime<Utc>,
373}
374
375#[derive(Debug, Clone, Serialize, Deserialize)]
377pub struct PredictionMetrics {
378 pub mae: f64,
380 pub rmse: f64,
382 pub r2_score: f64,
384 pub precision_at_k: HashMap<u32, f64>,
386}
387
388impl ResearchNetworkAnalyzer {
389 pub fn new(config: ResearchNetworkConfig) -> Self {
391 Self {
392 author_embeddings: Arc::new(RwLock::new(HashMap::new())),
393 publication_embeddings: Arc::new(RwLock::new(HashMap::new())),
394 citation_network: Arc::new(RwLock::new(CitationNetwork {
395 citations: HashMap::new(),
396 co_citations: HashMap::new(),
397 bibliographic_coupling: HashMap::new(),
398 temporal_patterns: HashMap::new(),
399 })),
400 collaboration_network: Arc::new(RwLock::new(CollaborationNetwork {
401 collaborations: HashMap::new(),
402 research_communities: Vec::new(),
403 temporal_collaborations: HashMap::new(),
404 })),
405 topic_models: Arc::new(RwLock::new(HashMap::new())),
406 config,
407 analysis_tasks: Vec::new(),
408 }
409 }
410
411 pub async fn start(&mut self) -> Result<()> {
413 info!("Starting research network analysis system");
414
415 let citation_task = self.start_citation_analysis().await;
417 self.analysis_tasks.push(citation_task);
418
419 let collaboration_task = self.start_collaboration_analysis().await;
421 self.analysis_tasks.push(collaboration_task);
422
423 let impact_task = self.start_impact_prediction().await;
425 self.analysis_tasks.push(impact_task);
426
427 let topic_task = self.start_topic_modeling().await;
429 self.analysis_tasks.push(topic_task);
430
431 info!("Research network analysis system started successfully");
432 Ok(())
433 }
434
435 pub async fn stop(&mut self) {
437 info!("Stopping research network analysis system");
438
439 for task in self.analysis_tasks.drain(..) {
440 task.abort();
441 }
442
443 info!("Research network analysis system stopped");
444 }
445
446 pub async fn generate_author_embedding(&self, author_id: &str) -> Result<AuthorEmbedding> {
448 {
450 let embeddings = self
451 .author_embeddings
452 .read()
453 .expect("rwlock should not be poisoned");
454 if let Some(existing) = embeddings.get(author_id) {
455 return Ok(existing.clone());
456 }
457 }
458
459 info!("Generating author embedding for: {}", author_id);
460
461 let author_publications = self.get_author_publications(author_id).await?;
463
464 let collaborations = self.get_author_collaborations(author_id).await?;
466
467 let research_topics = self
469 .extract_author_topics(author_id, &author_publications)
470 .await?;
471
472 let h_index = self.calculate_h_index(&author_publications).await?;
474 let citation_count = author_publications.iter().map(|p| p.citation_count).sum();
475 let collaboration_score = self.calculate_collaboration_score(&collaborations).await?;
476 let impact_score = self.calculate_author_impact_score(author_id).await?;
477
478 let embedding = self
480 .compute_author_embedding_vector(
481 &author_publications,
482 &collaborations,
483 &research_topics,
484 )
485 .await?;
486
487 let career_stage = self
489 .classify_career_stage(citation_count, author_publications.len() as u64, h_index)
490 .await?;
491
492 let author_embedding = AuthorEmbedding {
493 author_id: author_id.to_string(),
494 name: format!("Author_{author_id}"), affiliations: vec!["Unknown".to_string()], research_topics,
497 h_index,
498 citation_count,
499 publication_count: author_publications.len() as u64,
500 embedding,
501 collaboration_score,
502 impact_score,
503 career_stage,
504 last_updated: Utc::now(),
505 };
506
507 {
509 let mut embeddings = self
510 .author_embeddings
511 .write()
512 .expect("rwlock should not be poisoned");
513 embeddings.insert(author_id.to_string(), author_embedding.clone());
514 }
515
516 info!(
517 "Generated author embedding for {} with h-index: {:.2}",
518 author_id, h_index
519 );
520 Ok(author_embedding)
521 }
522
523 pub async fn generate_publication_embedding(
525 &self,
526 publication_id: &str,
527 ) -> Result<PublicationEmbedding> {
528 {
530 let embeddings = self
531 .publication_embeddings
532 .read()
533 .expect("rwlock should not be poisoned");
534 if let Some(existing) = embeddings.get(publication_id) {
535 return Ok(existing.clone());
536 }
537 }
538
539 info!("Generating publication embedding for: {}", publication_id);
540
541 let title = format!("Publication_{publication_id}");
543 let abstract_text = format!("Abstract for publication {publication_id}");
544 let authors = vec![format!("author_{}", publication_id)];
545 let venue = "Unknown Venue".to_string();
546 let year = 2023; let doi = Some(format!("10.1000/{publication_id}"));
548
549 let citation_count = self.get_publication_citation_count(publication_id).await?;
551
552 let topic_distribution = self
554 .extract_publication_topics(publication_id, &abstract_text)
555 .await?;
556
557 let embedding = self
559 .compute_publication_embedding_vector(&title, &abstract_text, &topic_distribution)
560 .await?;
561
562 let predicted_impact = self
564 .predict_publication_impact(citation_count, &topic_distribution, &embedding)
565 .await?;
566
567 let publication_embedding = PublicationEmbedding {
568 publication_id: publication_id.to_string(),
569 title,
570 abstract_text,
571 authors,
572 venue,
573 year,
574 citation_count,
575 topic_distribution,
576 embedding,
577 predicted_impact,
578 publication_type: PublicationType::JournalArticle, doi,
580 last_updated: Utc::now(),
581 };
582
583 {
585 let mut embeddings = self
586 .publication_embeddings
587 .write()
588 .expect("rwlock should not be poisoned");
589 embeddings.insert(publication_id.to_string(), publication_embedding.clone());
590 }
591
592 info!(
593 "Generated publication embedding for {} with predicted impact: {:.3}",
594 publication_id, predicted_impact
595 );
596 Ok(publication_embedding)
597 }
598
599 pub async fn analyze_citation_patterns(&self, publication_id: &str) -> Result<Vec<Citation>> {
601 let network = self
602 .citation_network
603 .read()
604 .expect("rwlock should not be poisoned");
605
606 if let Some(citations) = network.citations.get(publication_id) {
607 Ok(citations.clone())
608 } else {
609 Ok(Vec::new())
610 }
611 }
612
613 pub async fn find_similar_authors(
615 &self,
616 author_id: &str,
617 k: usize,
618 ) -> Result<Vec<(String, f64)>> {
619 let target_embedding = self.generate_author_embedding(author_id).await?;
620 let embeddings_data: Vec<(String, AuthorEmbedding)> = {
621 let embeddings = self
622 .author_embeddings
623 .read()
624 .expect("rwlock should not be poisoned");
625 embeddings
626 .iter()
627 .filter(|(other_id, _)| *other_id != author_id)
628 .map(|(id, emb)| (id.clone(), emb.clone()))
629 .collect()
630 };
631
632 let mut similarities = Vec::new();
633
634 for (other_id, other_embedding) in embeddings_data {
635 let similarity = self
636 .calculate_author_similarity(&target_embedding, &other_embedding)
637 .await?;
638 similarities.push((other_id, similarity));
639 }
640
641 similarities.sort_by(|a, b| {
643 b.1.partial_cmp(&a.1)
644 .expect("similarity scores should be comparable")
645 });
646 similarities.truncate(k);
647
648 Ok(similarities)
649 }
650
651 pub async fn predict_research_impact(&self, publication_id: &str) -> Result<f64> {
653 let publication = self.generate_publication_embedding(publication_id).await?;
654 Ok(publication.predicted_impact)
655 }
656
657 pub async fn analyze_research_trends(
659 &self,
660 topic: &str,
661 years: u32,
662 ) -> Result<Vec<TopicTrend>> {
663 let topics = self
664 .topic_models
665 .read()
666 .expect("rwlock should not be poisoned");
667
668 if let Some(topic_model) = topics.get(topic) {
669 let cutoff_date = Utc::now() - chrono::Duration::days((years * 365) as i64);
671 let recent_trends: Vec<TopicTrend> = topic_model
672 .temporal_trend
673 .iter()
674 .filter(|trend| trend.timestamp > cutoff_date)
675 .cloned()
676 .collect();
677
678 Ok(recent_trends)
679 } else {
680 Ok(Vec::new())
681 }
682 }
683
684 pub async fn get_research_communities(&self) -> Result<Vec<ResearchCommunity>> {
686 let network = self
687 .collaboration_network
688 .read()
689 .expect("rwlock should not be poisoned");
690 Ok(network.research_communities.clone())
691 }
692
693 pub async fn add_citation(&self, citation: Citation) -> Result<()> {
695 let mut network = self
696 .citation_network
697 .write()
698 .expect("rwlock should not be poisoned");
699
700 network
701 .citations
702 .entry(citation.citing_paper.clone())
703 .or_default()
704 .push(citation);
705
706 info!("Added new citation to network");
707 Ok(())
708 }
709
710 async fn get_author_publications(&self, _author_id: &str) -> Result<Vec<PublicationEmbedding>> {
713 Ok(Vec::new())
715 }
716
717 async fn get_author_collaborations(&self, _author_id: &str) -> Result<Vec<Collaboration>> {
718 Ok(Vec::new())
720 }
721
722 async fn extract_author_topics(
723 &self,
724 _author_id: &str,
725 _publications: &[PublicationEmbedding],
726 ) -> Result<Vec<String>> {
727 Ok(vec![
729 "machine_learning".to_string(),
730 "natural_language_processing".to_string(),
731 ])
732 }
733
734 async fn calculate_h_index(&self, publications: &[PublicationEmbedding]) -> Result<f64> {
735 let mut citation_counts: Vec<u64> = publications.iter().map(|p| p.citation_count).collect();
736
737 citation_counts.sort_by(|a, b| b.cmp(a));
738
739 let mut h_index = 0;
740 for (i, &citations) in citation_counts.iter().enumerate() {
741 if citations >= (i + 1) as u64 {
742 h_index = i + 1;
743 } else {
744 break;
745 }
746 }
747
748 Ok(h_index as f64)
749 }
750
751 async fn calculate_collaboration_score(&self, collaborations: &[Collaboration]) -> Result<f64> {
752 if collaborations.is_empty() {
753 return Ok(0.0);
754 }
755
756 let total_strength: f64 = collaborations.iter().map(|c| c.strength).sum();
757 Ok(total_strength / collaborations.len() as f64)
758 }
759
760 async fn calculate_author_impact_score(&self, _author_id: &str) -> Result<f64> {
761 Ok(0.75)
763 }
764
765 async fn compute_author_embedding_vector(
766 &self,
767 _publications: &[PublicationEmbedding],
768 _collaborations: &[Collaboration],
769 _topics: &[String],
770 ) -> Result<Vector> {
771 let values = (0..self.config.embedding_dimension)
773 .map(|_| {
774 let mut random = Random::default();
775 random.random::<f32>()
776 })
777 .collect();
778 Ok(Vector::new(values))
779 }
780
781 async fn classify_career_stage(
782 &self,
783 citation_count: u64,
784 publication_count: u64,
785 h_index: f64,
786 ) -> Result<CareerStage> {
787 if citation_count < 100 && publication_count < 10 && h_index < 5.0 {
788 Ok(CareerStage::EarlyCareer)
789 } else if citation_count < 1000 && publication_count < 50 && h_index < 20.0 {
790 Ok(CareerStage::MidCareer)
791 } else if citation_count >= 1000 || publication_count >= 50 || h_index >= 20.0 {
792 Ok(CareerStage::SeniorCareer)
793 } else {
794 Ok(CareerStage::Unknown)
795 }
796 }
797
798 async fn get_publication_citation_count(&self, _publication_id: &str) -> Result<u64> {
799 let mut random = Random::default();
801 Ok(random.random::<u64>() % 100)
802 }
803
804 async fn extract_publication_topics(
805 &self,
806 _publication_id: &str,
807 _abstract_text: &str,
808 ) -> Result<Vec<f64>> {
809 let num_topics = self.config.topic_config.num_topics;
811 let mut distribution = vec![0.0; num_topics];
812
813 let total: f64 = (0..num_topics)
815 .map(|_| {
816 let mut random = Random::default();
817 random.random::<f64>()
818 })
819 .sum();
820 for item in distribution.iter_mut().take(num_topics) {
821 let mut random = Random::default();
822 *item = random.random::<f64>() / total;
823 }
824
825 Ok(distribution)
826 }
827
828 async fn compute_publication_embedding_vector(
829 &self,
830 _title: &str,
831 _abstract_text: &str,
832 _topic_distribution: &[f64],
833 ) -> Result<Vector> {
834 let values = (0..self.config.embedding_dimension)
836 .map(|_| {
837 let mut random = Random::default();
838 random.random::<f32>()
839 })
840 .collect();
841 Ok(Vector::new(values))
842 }
843
844 async fn predict_publication_impact(
845 &self,
846 citation_count: u64,
847 _topic_distribution: &[f64],
848 _embedding: &Vector,
849 ) -> Result<f64> {
850 let base_impact = (citation_count as f64).ln() / 10.0;
852 Ok(base_impact.clamp(0.0, 1.0))
853 }
854
855 async fn calculate_author_similarity(
856 &self,
857 author1: &AuthorEmbedding,
858 author2: &AuthorEmbedding,
859 ) -> Result<f64> {
860 let embedding1 = &author1.embedding.values;
862 let embedding2 = &author2.embedding.values;
863
864 let dot_product: f32 = embedding1
865 .iter()
866 .zip(embedding2.iter())
867 .map(|(a, b)| a * b)
868 .sum();
869 let norm1: f32 = embedding1.iter().map(|x| x * x).sum::<f32>().sqrt();
870 let norm2: f32 = embedding2.iter().map(|x| x * x).sum::<f32>().sqrt();
871
872 let cosine_similarity = if norm1 > 0.0 && norm2 > 0.0 {
873 dot_product / (norm1 * norm2)
874 } else {
875 0.0
876 };
877
878 let topic_similarity = self
880 .calculate_topic_similarity(&author1.research_topics, &author2.research_topics)
881 .await?;
882
883 let final_similarity = 0.7 * cosine_similarity as f64 + 0.3 * topic_similarity;
885
886 Ok(final_similarity)
887 }
888
889 async fn calculate_topic_similarity(
890 &self,
891 topics1: &[String],
892 topics2: &[String],
893 ) -> Result<f64> {
894 let set1: HashSet<_> = topics1.iter().collect();
895 let set2: HashSet<_> = topics2.iter().collect();
896
897 let intersection = set1.intersection(&set2).count();
898 let union = set1.union(&set2).count();
899
900 if union > 0 {
901 Ok(intersection as f64 / union as f64)
902 } else {
903 Ok(0.0)
904 }
905 }
906
907 async fn start_citation_analysis(&self) -> JoinHandle<()> {
910 let _citation_network = Arc::clone(&self.citation_network);
911 let interval =
912 std::time::Duration::from_secs(self.config.citation_update_interval_hours * 3600);
913
914 tokio::spawn(async move {
915 let mut interval_timer = tokio::time::interval(interval);
916
917 loop {
918 interval_timer.tick().await;
919
920 info!("Performing citation network analysis");
922
923 debug!("Citation network analysis completed");
927 }
928 })
929 }
930
931 async fn start_collaboration_analysis(&self) -> JoinHandle<()> {
932 let _collaboration_network = Arc::clone(&self.collaboration_network);
933 let interval = std::time::Duration::from_secs(
934 self.config.collaboration_analysis_interval_hours * 3600,
935 );
936
937 tokio::spawn(async move {
938 let mut interval_timer = tokio::time::interval(interval);
939
940 loop {
941 interval_timer.tick().await;
942
943 info!("Performing collaboration network analysis");
945
946 debug!("Collaboration network analysis completed");
950 }
951 })
952 }
953
954 async fn start_impact_prediction(&self) -> JoinHandle<()> {
955 let interval =
956 std::time::Duration::from_secs(self.config.impact_prediction_refresh_hours * 3600);
957
958 tokio::spawn(async move {
959 let mut interval_timer = tokio::time::interval(interval);
960
961 loop {
962 interval_timer.tick().await;
963
964 info!("Refreshing impact prediction models");
966
967 debug!("Impact prediction models refreshed");
971 }
972 })
973 }
974
975 async fn start_topic_modeling(&self) -> JoinHandle<()> {
976 let topic_models = Arc::clone(&self.topic_models);
977 let _config = self.config.clone();
978 let interval = std::time::Duration::from_secs(24 * 3600); tokio::spawn(async move {
981 let mut interval_timer = tokio::time::interval(interval);
982
983 loop {
984 interval_timer.tick().await;
985
986 info!("Updating topic models");
988
989 let topic_model = TopicModel {
991 topic_id: "machine_learning".to_string(),
992 topic_name: "Machine Learning".to_string(),
993 topic_words: vec![
994 ("neural".to_string(), 0.1),
995 ("network".to_string(), 0.09),
996 ("learning".to_string(), 0.08),
997 ("algorithm".to_string(), 0.07),
998 ("model".to_string(), 0.06),
999 ],
1000 document_topics: HashMap::new(),
1001 coherence_score: 0.75,
1002 temporal_trend: vec![
1003 TopicTrend {
1004 timestamp: Utc::now() - chrono::Duration::days(365),
1005 popularity: 0.6,
1006 publication_count: 1000,
1007 growth_rate: 0.15,
1008 },
1009 TopicTrend {
1010 timestamp: Utc::now(),
1011 popularity: 0.8,
1012 publication_count: 1500,
1013 growth_rate: 0.25,
1014 },
1015 ],
1016 };
1017
1018 {
1019 let mut models = topic_models.write().expect("rwlock should not be poisoned");
1020 models.insert("machine_learning".to_string(), topic_model);
1021 }
1022
1023 debug!("Topic models updated");
1024 }
1025 })
1026 }
1027}
1028
1029#[derive(Debug, Clone, Serialize, Deserialize)]
1031pub struct NetworkMetrics {
1032 pub total_authors: usize,
1034 pub total_publications: usize,
1036 pub total_citations: u64,
1038 pub avg_citations_per_paper: f64,
1040 pub network_density: f64,
1042 pub clustering_coefficient: f64,
1044 pub average_path_length: f64,
1046 pub top_authors: Vec<String>,
1048 pub trending_topics: Vec<String>,
1050}
1051
1052impl ResearchNetworkAnalyzer {
1053 pub async fn get_network_metrics(&self) -> Result<NetworkMetrics> {
1055 let author_embeddings = self
1056 .author_embeddings
1057 .read()
1058 .expect("rwlock should not be poisoned");
1059 let publication_embeddings = self
1060 .publication_embeddings
1061 .read()
1062 .expect("rwlock should not be poisoned");
1063
1064 let total_authors = author_embeddings.len();
1065 let total_publications = publication_embeddings.len();
1066 let total_citations = publication_embeddings
1067 .values()
1068 .map(|p| p.citation_count)
1069 .sum();
1070
1071 let avg_citations_per_paper = if total_publications > 0 {
1072 total_citations as f64 / total_publications as f64
1073 } else {
1074 0.0
1075 };
1076
1077 let mut author_scores: Vec<_> = author_embeddings
1079 .iter()
1080 .map(|(id, embedding)| (id.clone(), embedding.impact_score))
1081 .collect();
1082 author_scores.sort_by(|a, b| {
1083 b.1.partial_cmp(&a.1)
1084 .expect("similarity scores should be comparable")
1085 });
1086 let top_authors: Vec<String> = author_scores
1087 .into_iter()
1088 .take(10)
1089 .map(|(id, _)| id)
1090 .collect();
1091
1092 Ok(NetworkMetrics {
1093 total_authors,
1094 total_publications,
1095 total_citations,
1096 avg_citations_per_paper,
1097 network_density: 0.1, clustering_coefficient: 0.3, average_path_length: 4.5, top_authors,
1101 trending_topics: vec!["machine_learning".to_string(), "deep_learning".to_string()],
1102 })
1103 }
1104}
1105
1106#[cfg(test)]
1107mod tests {
1108 use super::*;
1109
1110 #[tokio::test]
1111 async fn test_research_network_analyzer_creation() {
1112 let config = ResearchNetworkConfig::default();
1113 let analyzer = ResearchNetworkAnalyzer::new(config);
1114
1115 assert_eq!(
1117 analyzer
1118 .author_embeddings
1119 .read()
1120 .expect("rwlock should not be poisoned")
1121 .len(),
1122 0
1123 );
1124 assert_eq!(
1125 analyzer
1126 .publication_embeddings
1127 .read()
1128 .expect("rwlock should not be poisoned")
1129 .len(),
1130 0
1131 );
1132 }
1133
1134 #[tokio::test]
1135 async fn test_author_embedding_generation() {
1136 let config = ResearchNetworkConfig::default();
1137 let analyzer = ResearchNetworkAnalyzer::new(config);
1138
1139 let result = analyzer.generate_author_embedding("test_author").await;
1140 assert!(result.is_ok());
1141
1142 let embedding = result.unwrap();
1143 assert_eq!(embedding.author_id, "test_author");
1144 assert!(embedding.h_index >= 0.0);
1145 assert_eq!(embedding.embedding.values.len(), 512); }
1147
1148 #[tokio::test]
1149 async fn test_publication_embedding_generation() {
1150 let config = ResearchNetworkConfig::default();
1151 let analyzer = ResearchNetworkAnalyzer::new(config);
1152
1153 let result = analyzer
1154 .generate_publication_embedding("test_publication")
1155 .await;
1156 assert!(result.is_ok());
1157
1158 let embedding = result.unwrap();
1159 assert_eq!(embedding.publication_id, "test_publication");
1160 assert!(embedding.predicted_impact >= 0.0);
1161 assert!(embedding.predicted_impact <= 1.0);
1162 }
1163
1164 #[tokio::test]
1165 async fn test_h_index_calculation() {
1166 let config = ResearchNetworkConfig::default();
1167 let analyzer = ResearchNetworkAnalyzer::new(config);
1168
1169 let publications = vec![
1171 PublicationEmbedding {
1172 publication_id: "p1".to_string(),
1173 title: "Test 1".to_string(),
1174 abstract_text: "Abstract 1".to_string(),
1175 authors: vec!["author1".to_string()],
1176 venue: "Venue 1".to_string(),
1177 year: 2023,
1178 citation_count: 10,
1179 topic_distribution: vec![],
1180 embedding: Vector::new(vec![]),
1181 predicted_impact: 0.5,
1182 publication_type: PublicationType::JournalArticle,
1183 doi: None,
1184 last_updated: Utc::now(),
1185 },
1186 PublicationEmbedding {
1187 publication_id: "p2".to_string(),
1188 title: "Test 2".to_string(),
1189 abstract_text: "Abstract 2".to_string(),
1190 authors: vec!["author1".to_string()],
1191 venue: "Venue 2".to_string(),
1192 year: 2023,
1193 citation_count: 5,
1194 topic_distribution: vec![],
1195 embedding: Vector::new(vec![]),
1196 predicted_impact: 0.3,
1197 publication_type: PublicationType::JournalArticle,
1198 doi: None,
1199 last_updated: Utc::now(),
1200 },
1201 ];
1202
1203 let h_index = analyzer.calculate_h_index(&publications).await.unwrap();
1204 assert_eq!(h_index, 2.0); }
1206
1207 #[test]
1208 fn test_career_stage_classification() {
1209 let rt = tokio::runtime::Runtime::new().unwrap();
1211 let config = ResearchNetworkConfig::default();
1212 let analyzer = ResearchNetworkAnalyzer::new(config);
1213
1214 let stage = rt
1215 .block_on(analyzer.classify_career_stage(50, 5, 3.0))
1216 .unwrap();
1217 assert!(matches!(stage, CareerStage::EarlyCareer));
1218
1219 let stage = rt
1221 .block_on(analyzer.classify_career_stage(2000, 100, 25.0))
1222 .unwrap();
1223 assert!(matches!(stage, CareerStage::SeniorCareer));
1224 }
1225
1226 #[tokio::test]
1227 async fn test_network_metrics() {
1228 let config = ResearchNetworkConfig::default();
1229 let analyzer = ResearchNetworkAnalyzer::new(config);
1230
1231 let _author_embedding = analyzer
1233 .generate_author_embedding("test_author")
1234 .await
1235 .unwrap();
1236 let _publication_embedding = analyzer
1237 .generate_publication_embedding("test_publication")
1238 .await
1239 .unwrap();
1240
1241 let metrics = analyzer.get_network_metrics().await.unwrap();
1242 assert_eq!(metrics.total_authors, 1);
1243 assert_eq!(metrics.total_publications, 1);
1244 }
1245}