use crate::Vector;
use anyhow::Result;
use chrono::{DateTime, Utc};
use scirs2_core::random::{Random, RngExt};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, RwLock};
use tokio::task::JoinHandle;
use tracing::{debug, info};
pub struct ResearchNetworkAnalyzer {
author_embeddings: Arc<RwLock<HashMap<String, AuthorEmbedding>>>,
publication_embeddings: Arc<RwLock<HashMap<String, PublicationEmbedding>>>,
citation_network: Arc<RwLock<CitationNetwork>>,
collaboration_network: Arc<RwLock<CollaborationNetwork>>,
topic_models: Arc<RwLock<HashMap<String, TopicModel>>>,
config: ResearchNetworkConfig,
analysis_tasks: Vec<JoinHandle<()>>,
}
#[derive(Debug, Clone)]
pub struct ResearchNetworkConfig {
pub max_authors: usize,
pub max_publications: usize,
pub citation_update_interval_hours: u64,
pub collaboration_analysis_interval_hours: u64,
pub impact_prediction_refresh_hours: u64,
pub enable_real_time_citation_tracking: bool,
pub min_citation_threshold: u32,
pub topic_config: TopicModelingConfig,
pub embedding_dimension: usize,
}
impl Default for ResearchNetworkConfig {
fn default() -> Self {
Self {
max_authors: 100_000,
max_publications: 1_000_000,
citation_update_interval_hours: 24,
collaboration_analysis_interval_hours: 12,
impact_prediction_refresh_hours: 48,
enable_real_time_citation_tracking: true,
min_citation_threshold: 5,
topic_config: TopicModelingConfig::default(),
embedding_dimension: 512,
}
}
}
#[derive(Debug, Clone)]
pub struct TopicModelingConfig {
pub num_topics: usize,
pub min_word_freq: u32,
pub max_doc_freq_ratio: f64,
pub lda_iterations: u32,
pub coherence_threshold: f64,
}
impl Default for TopicModelingConfig {
fn default() -> Self {
Self {
num_topics: 50,
min_word_freq: 5,
max_doc_freq_ratio: 0.8,
lda_iterations: 1000,
coherence_threshold: 0.4,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuthorEmbedding {
pub author_id: String,
pub name: String,
pub affiliations: Vec<String>,
pub research_topics: Vec<String>,
pub h_index: f64,
pub citation_count: u64,
pub publication_count: u64,
pub embedding: Vector,
pub collaboration_score: f64,
pub impact_score: f64,
pub career_stage: CareerStage,
pub last_updated: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PublicationEmbedding {
pub publication_id: String,
pub title: String,
pub abstract_text: String,
pub authors: Vec<String>,
pub venue: String,
pub year: u32,
pub citation_count: u64,
pub topic_distribution: Vec<f64>,
pub embedding: Vector,
pub predicted_impact: f64,
pub publication_type: PublicationType,
pub doi: Option<String>,
pub last_updated: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CareerStage {
EarlyCareer,
MidCareer,
SeniorCareer,
Emeritus,
Unknown,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PublicationType {
JournalArticle,
ConferencePaper,
BookChapter,
Book,
Preprint,
Thesis,
TechnicalReport,
Other,
}
#[derive(Debug, Clone)]
pub struct CitationNetwork {
pub citations: HashMap<String, Vec<Citation>>,
pub co_citations: HashMap<String, Vec<CoCitation>>,
pub bibliographic_coupling: HashMap<String, Vec<BibliographicCoupling>>,
pub temporal_patterns: HashMap<String, Vec<TemporalCitation>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Citation {
pub citing_paper: String,
pub cited_paper: String,
pub context: String,
pub citation_type: CitationType,
pub section: PaperSection,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CitationType {
Supportive,
Contrasting,
Neutral,
Background,
Methodological,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PaperSection {
Introduction,
RelatedWork,
Methods,
Results,
Discussion,
Conclusion,
Other,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CoCitation {
pub paper1: String,
pub paper2: String,
pub co_citation_count: u32,
pub similarity_score: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BibliographicCoupling {
pub paper1: String,
pub paper2: String,
pub shared_references: u32,
pub coupling_strength: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TemporalCitation {
pub paper_id: String,
pub timestamp: DateTime<Utc>,
pub citation_count: u64,
pub citation_velocity: f64,
}
#[derive(Debug, Clone)]
pub struct CollaborationNetwork {
pub collaborations: HashMap<String, Vec<Collaboration>>,
pub research_communities: Vec<ResearchCommunity>,
pub temporal_collaborations: HashMap<String, Vec<TemporalCollaboration>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Collaboration {
pub author1: String,
pub author2: String,
pub joint_publications: u32,
pub strength: f64,
pub shared_topics: Vec<String>,
pub first_collaboration: DateTime<Utc>,
pub last_collaboration: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResearchCommunity {
pub community_id: String,
pub members: Vec<String>,
pub topics: Vec<String>,
pub central_members: Vec<String>,
pub coherence_score: f64,
pub size: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TemporalCollaboration {
pub author_id: String,
pub timestamp: DateTime<Utc>,
pub active_collaborations: u32,
pub new_collaborations: u32,
}
#[derive(Debug, Clone)]
pub struct TopicModel {
pub topic_id: String,
pub topic_name: String,
pub topic_words: Vec<(String, f64)>,
pub document_topics: HashMap<String, f64>,
pub coherence_score: f64,
pub temporal_trend: Vec<TopicTrend>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopicTrend {
pub timestamp: DateTime<Utc>,
pub popularity: f64,
pub publication_count: u64,
pub growth_rate: f64,
}
#[derive(Debug, Clone)]
pub struct ImpactPredictor {
pub feature_weights: HashMap<String, f64>,
pub performance_metrics: PredictionMetrics,
pub last_update: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PredictionMetrics {
pub mae: f64,
pub rmse: f64,
pub r2_score: f64,
pub precision_at_k: HashMap<u32, f64>,
}
impl ResearchNetworkAnalyzer {
pub fn new(config: ResearchNetworkConfig) -> Self {
Self {
author_embeddings: Arc::new(RwLock::new(HashMap::new())),
publication_embeddings: Arc::new(RwLock::new(HashMap::new())),
citation_network: Arc::new(RwLock::new(CitationNetwork {
citations: HashMap::new(),
co_citations: HashMap::new(),
bibliographic_coupling: HashMap::new(),
temporal_patterns: HashMap::new(),
})),
collaboration_network: Arc::new(RwLock::new(CollaborationNetwork {
collaborations: HashMap::new(),
research_communities: Vec::new(),
temporal_collaborations: HashMap::new(),
})),
topic_models: Arc::new(RwLock::new(HashMap::new())),
config,
analysis_tasks: Vec::new(),
}
}
pub async fn start(&mut self) -> Result<()> {
info!("Starting research network analysis system");
let citation_task = self.start_citation_analysis().await;
self.analysis_tasks.push(citation_task);
let collaboration_task = self.start_collaboration_analysis().await;
self.analysis_tasks.push(collaboration_task);
let impact_task = self.start_impact_prediction().await;
self.analysis_tasks.push(impact_task);
let topic_task = self.start_topic_modeling().await;
self.analysis_tasks.push(topic_task);
info!("Research network analysis system started successfully");
Ok(())
}
pub async fn stop(&mut self) {
info!("Stopping research network analysis system");
for task in self.analysis_tasks.drain(..) {
task.abort();
}
info!("Research network analysis system stopped");
}
pub async fn generate_author_embedding(&self, author_id: &str) -> Result<AuthorEmbedding> {
{
let embeddings = self
.author_embeddings
.read()
.expect("rwlock should not be poisoned");
if let Some(existing) = embeddings.get(author_id) {
return Ok(existing.clone());
}
}
info!("Generating author embedding for: {}", author_id);
let author_publications = self.get_author_publications(author_id).await?;
let collaborations = self.get_author_collaborations(author_id).await?;
let research_topics = self
.extract_author_topics(author_id, &author_publications)
.await?;
let h_index = self.calculate_h_index(&author_publications).await?;
let citation_count = author_publications.iter().map(|p| p.citation_count).sum();
let collaboration_score = self.calculate_collaboration_score(&collaborations).await?;
let impact_score = self.calculate_author_impact_score(author_id).await?;
let embedding = self
.compute_author_embedding_vector(
&author_publications,
&collaborations,
&research_topics,
)
.await?;
let career_stage = self
.classify_career_stage(citation_count, author_publications.len() as u64, h_index)
.await?;
let author_embedding = AuthorEmbedding {
author_id: author_id.to_string(),
name: format!("Author_{author_id}"), affiliations: vec!["Unknown".to_string()], research_topics,
h_index,
citation_count,
publication_count: author_publications.len() as u64,
embedding,
collaboration_score,
impact_score,
career_stage,
last_updated: Utc::now(),
};
{
let mut embeddings = self
.author_embeddings
.write()
.expect("rwlock should not be poisoned");
embeddings.insert(author_id.to_string(), author_embedding.clone());
}
info!(
"Generated author embedding for {} with h-index: {:.2}",
author_id, h_index
);
Ok(author_embedding)
}
pub async fn generate_publication_embedding(
&self,
publication_id: &str,
) -> Result<PublicationEmbedding> {
{
let embeddings = self
.publication_embeddings
.read()
.expect("rwlock should not be poisoned");
if let Some(existing) = embeddings.get(publication_id) {
return Ok(existing.clone());
}
}
info!("Generating publication embedding for: {}", publication_id);
let title = format!("Publication_{publication_id}");
let abstract_text = format!("Abstract for publication {publication_id}");
let authors = vec![format!("author_{}", publication_id)];
let venue = "Unknown Venue".to_string();
let year = 2023; let doi = Some(format!("10.1000/{publication_id}"));
let citation_count = self.get_publication_citation_count(publication_id).await?;
let topic_distribution = self
.extract_publication_topics(publication_id, &abstract_text)
.await?;
let embedding = self
.compute_publication_embedding_vector(&title, &abstract_text, &topic_distribution)
.await?;
let predicted_impact = self
.predict_publication_impact(citation_count, &topic_distribution, &embedding)
.await?;
let publication_embedding = PublicationEmbedding {
publication_id: publication_id.to_string(),
title,
abstract_text,
authors,
venue,
year,
citation_count,
topic_distribution,
embedding,
predicted_impact,
publication_type: PublicationType::JournalArticle, doi,
last_updated: Utc::now(),
};
{
let mut embeddings = self
.publication_embeddings
.write()
.expect("rwlock should not be poisoned");
embeddings.insert(publication_id.to_string(), publication_embedding.clone());
}
info!(
"Generated publication embedding for {} with predicted impact: {:.3}",
publication_id, predicted_impact
);
Ok(publication_embedding)
}
pub async fn analyze_citation_patterns(&self, publication_id: &str) -> Result<Vec<Citation>> {
let network = self
.citation_network
.read()
.expect("rwlock should not be poisoned");
if let Some(citations) = network.citations.get(publication_id) {
Ok(citations.clone())
} else {
Ok(Vec::new())
}
}
pub async fn find_similar_authors(
&self,
author_id: &str,
k: usize,
) -> Result<Vec<(String, f64)>> {
let target_embedding = self.generate_author_embedding(author_id).await?;
let embeddings_data: Vec<(String, AuthorEmbedding)> = {
let embeddings = self
.author_embeddings
.read()
.expect("rwlock should not be poisoned");
embeddings
.iter()
.filter(|(other_id, _)| *other_id != author_id)
.map(|(id, emb)| (id.clone(), emb.clone()))
.collect()
};
let mut similarities = Vec::new();
for (other_id, other_embedding) in embeddings_data {
let similarity = self
.calculate_author_similarity(&target_embedding, &other_embedding)
.await?;
similarities.push((other_id, similarity));
}
similarities.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.expect("similarity scores should be comparable")
});
similarities.truncate(k);
Ok(similarities)
}
pub async fn predict_research_impact(&self, publication_id: &str) -> Result<f64> {
let publication = self.generate_publication_embedding(publication_id).await?;
Ok(publication.predicted_impact)
}
pub async fn analyze_research_trends(
&self,
topic: &str,
years: u32,
) -> Result<Vec<TopicTrend>> {
let topics = self
.topic_models
.read()
.expect("rwlock should not be poisoned");
if let Some(topic_model) = topics.get(topic) {
let cutoff_date = Utc::now() - chrono::Duration::days((years * 365) as i64);
let recent_trends: Vec<TopicTrend> = topic_model
.temporal_trend
.iter()
.filter(|trend| trend.timestamp > cutoff_date)
.cloned()
.collect();
Ok(recent_trends)
} else {
Ok(Vec::new())
}
}
pub async fn get_research_communities(&self) -> Result<Vec<ResearchCommunity>> {
let network = self
.collaboration_network
.read()
.expect("rwlock should not be poisoned");
Ok(network.research_communities.clone())
}
pub async fn add_citation(&self, citation: Citation) -> Result<()> {
let mut network = self
.citation_network
.write()
.expect("rwlock should not be poisoned");
network
.citations
.entry(citation.citing_paper.clone())
.or_default()
.push(citation);
info!("Added new citation to network");
Ok(())
}
async fn get_author_publications(&self, _author_id: &str) -> Result<Vec<PublicationEmbedding>> {
Ok(Vec::new())
}
async fn get_author_collaborations(&self, _author_id: &str) -> Result<Vec<Collaboration>> {
Ok(Vec::new())
}
async fn extract_author_topics(
&self,
_author_id: &str,
_publications: &[PublicationEmbedding],
) -> Result<Vec<String>> {
Ok(vec![
"machine_learning".to_string(),
"natural_language_processing".to_string(),
])
}
async fn calculate_h_index(&self, publications: &[PublicationEmbedding]) -> Result<f64> {
let mut citation_counts: Vec<u64> = publications.iter().map(|p| p.citation_count).collect();
citation_counts.sort_by(|a, b| b.cmp(a));
let mut h_index = 0;
for (i, &citations) in citation_counts.iter().enumerate() {
if citations >= (i + 1) as u64 {
h_index = i + 1;
} else {
break;
}
}
Ok(h_index as f64)
}
async fn calculate_collaboration_score(&self, collaborations: &[Collaboration]) -> Result<f64> {
if collaborations.is_empty() {
return Ok(0.0);
}
let total_strength: f64 = collaborations.iter().map(|c| c.strength).sum();
Ok(total_strength / collaborations.len() as f64)
}
async fn calculate_author_impact_score(&self, _author_id: &str) -> Result<f64> {
Ok(0.75)
}
async fn compute_author_embedding_vector(
&self,
_publications: &[PublicationEmbedding],
_collaborations: &[Collaboration],
_topics: &[String],
) -> Result<Vector> {
let values = (0..self.config.embedding_dimension)
.map(|_| {
let mut random = Random::default();
random.random::<f32>()
})
.collect();
Ok(Vector::new(values))
}
async fn classify_career_stage(
&self,
citation_count: u64,
publication_count: u64,
h_index: f64,
) -> Result<CareerStage> {
if citation_count < 100 && publication_count < 10 && h_index < 5.0 {
Ok(CareerStage::EarlyCareer)
} else if citation_count < 1000 && publication_count < 50 && h_index < 20.0 {
Ok(CareerStage::MidCareer)
} else if citation_count >= 1000 || publication_count >= 50 || h_index >= 20.0 {
Ok(CareerStage::SeniorCareer)
} else {
Ok(CareerStage::Unknown)
}
}
async fn get_publication_citation_count(&self, _publication_id: &str) -> Result<u64> {
let mut random = Random::default();
Ok(random.random::<u64>() % 100)
}
async fn extract_publication_topics(
&self,
_publication_id: &str,
_abstract_text: &str,
) -> Result<Vec<f64>> {
let num_topics = self.config.topic_config.num_topics;
let mut distribution = vec![0.0; num_topics];
let total: f64 = (0..num_topics)
.map(|_| {
let mut random = Random::default();
random.random::<f64>()
})
.sum();
for item in distribution.iter_mut().take(num_topics) {
let mut random = Random::default();
*item = random.random::<f64>() / total;
}
Ok(distribution)
}
async fn compute_publication_embedding_vector(
&self,
_title: &str,
_abstract_text: &str,
_topic_distribution: &[f64],
) -> Result<Vector> {
let values = (0..self.config.embedding_dimension)
.map(|_| {
let mut random = Random::default();
random.random::<f32>()
})
.collect();
Ok(Vector::new(values))
}
async fn predict_publication_impact(
&self,
citation_count: u64,
_topic_distribution: &[f64],
_embedding: &Vector,
) -> Result<f64> {
let base_impact = (citation_count as f64).ln() / 10.0;
Ok(base_impact.clamp(0.0, 1.0))
}
async fn calculate_author_similarity(
&self,
author1: &AuthorEmbedding,
author2: &AuthorEmbedding,
) -> Result<f64> {
let embedding1 = &author1.embedding.values;
let embedding2 = &author2.embedding.values;
let dot_product: f32 = embedding1
.iter()
.zip(embedding2.iter())
.map(|(a, b)| a * b)
.sum();
let norm1: f32 = embedding1.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm2: f32 = embedding2.iter().map(|x| x * x).sum::<f32>().sqrt();
let cosine_similarity = if norm1 > 0.0 && norm2 > 0.0 {
dot_product / (norm1 * norm2)
} else {
0.0
};
let topic_similarity = self
.calculate_topic_similarity(&author1.research_topics, &author2.research_topics)
.await?;
let final_similarity = 0.7 * cosine_similarity as f64 + 0.3 * topic_similarity;
Ok(final_similarity)
}
async fn calculate_topic_similarity(
&self,
topics1: &[String],
topics2: &[String],
) -> Result<f64> {
let set1: HashSet<_> = topics1.iter().collect();
let set2: HashSet<_> = topics2.iter().collect();
let intersection = set1.intersection(&set2).count();
let union = set1.union(&set2).count();
if union > 0 {
Ok(intersection as f64 / union as f64)
} else {
Ok(0.0)
}
}
async fn start_citation_analysis(&self) -> JoinHandle<()> {
let _citation_network = Arc::clone(&self.citation_network);
let interval =
std::time::Duration::from_secs(self.config.citation_update_interval_hours * 3600);
tokio::spawn(async move {
let mut interval_timer = tokio::time::interval(interval);
loop {
interval_timer.tick().await;
info!("Performing citation network analysis");
debug!("Citation network analysis completed");
}
})
}
async fn start_collaboration_analysis(&self) -> JoinHandle<()> {
let _collaboration_network = Arc::clone(&self.collaboration_network);
let interval = std::time::Duration::from_secs(
self.config.collaboration_analysis_interval_hours * 3600,
);
tokio::spawn(async move {
let mut interval_timer = tokio::time::interval(interval);
loop {
interval_timer.tick().await;
info!("Performing collaboration network analysis");
debug!("Collaboration network analysis completed");
}
})
}
async fn start_impact_prediction(&self) -> JoinHandle<()> {
let interval =
std::time::Duration::from_secs(self.config.impact_prediction_refresh_hours * 3600);
tokio::spawn(async move {
let mut interval_timer = tokio::time::interval(interval);
loop {
interval_timer.tick().await;
info!("Refreshing impact prediction models");
debug!("Impact prediction models refreshed");
}
})
}
async fn start_topic_modeling(&self) -> JoinHandle<()> {
let topic_models = Arc::clone(&self.topic_models);
let _config = self.config.clone();
let interval = std::time::Duration::from_secs(24 * 3600);
tokio::spawn(async move {
let mut interval_timer = tokio::time::interval(interval);
loop {
interval_timer.tick().await;
info!("Updating topic models");
let topic_model = TopicModel {
topic_id: "machine_learning".to_string(),
topic_name: "Machine Learning".to_string(),
topic_words: vec![
("neural".to_string(), 0.1),
("network".to_string(), 0.09),
("learning".to_string(), 0.08),
("algorithm".to_string(), 0.07),
("model".to_string(), 0.06),
],
document_topics: HashMap::new(),
coherence_score: 0.75,
temporal_trend: vec![
TopicTrend {
timestamp: Utc::now() - chrono::Duration::days(365),
popularity: 0.6,
publication_count: 1000,
growth_rate: 0.15,
},
TopicTrend {
timestamp: Utc::now(),
popularity: 0.8,
publication_count: 1500,
growth_rate: 0.25,
},
],
};
{
let mut models = topic_models.write().expect("rwlock should not be poisoned");
models.insert("machine_learning".to_string(), topic_model);
}
debug!("Topic models updated");
}
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NetworkMetrics {
pub total_authors: usize,
pub total_publications: usize,
pub total_citations: u64,
pub avg_citations_per_paper: f64,
pub network_density: f64,
pub clustering_coefficient: f64,
pub average_path_length: f64,
pub top_authors: Vec<String>,
pub trending_topics: Vec<String>,
}
impl ResearchNetworkAnalyzer {
pub async fn get_network_metrics(&self) -> Result<NetworkMetrics> {
let author_embeddings = self
.author_embeddings
.read()
.expect("rwlock should not be poisoned");
let publication_embeddings = self
.publication_embeddings
.read()
.expect("rwlock should not be poisoned");
let total_authors = author_embeddings.len();
let total_publications = publication_embeddings.len();
let total_citations = publication_embeddings
.values()
.map(|p| p.citation_count)
.sum();
let avg_citations_per_paper = if total_publications > 0 {
total_citations as f64 / total_publications as f64
} else {
0.0
};
let mut author_scores: Vec<_> = author_embeddings
.iter()
.map(|(id, embedding)| (id.clone(), embedding.impact_score))
.collect();
author_scores.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.expect("similarity scores should be comparable")
});
let top_authors: Vec<String> = author_scores
.into_iter()
.take(10)
.map(|(id, _)| id)
.collect();
Ok(NetworkMetrics {
total_authors,
total_publications,
total_citations,
avg_citations_per_paper,
network_density: 0.1, clustering_coefficient: 0.3, average_path_length: 4.5, top_authors,
trending_topics: vec!["machine_learning".to_string(), "deep_learning".to_string()],
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_research_network_analyzer_creation() {
let config = ResearchNetworkConfig::default();
let analyzer = ResearchNetworkAnalyzer::new(config);
assert_eq!(
analyzer
.author_embeddings
.read()
.expect("rwlock should not be poisoned")
.len(),
0
);
assert_eq!(
analyzer
.publication_embeddings
.read()
.expect("rwlock should not be poisoned")
.len(),
0
);
}
#[tokio::test]
async fn test_author_embedding_generation() {
let config = ResearchNetworkConfig::default();
let analyzer = ResearchNetworkAnalyzer::new(config);
let result = analyzer.generate_author_embedding("test_author").await;
assert!(result.is_ok());
let embedding = result.expect("should succeed");
assert_eq!(embedding.author_id, "test_author");
assert!(embedding.h_index >= 0.0);
assert_eq!(embedding.embedding.values.len(), 512); }
#[tokio::test]
async fn test_publication_embedding_generation() {
let config = ResearchNetworkConfig::default();
let analyzer = ResearchNetworkAnalyzer::new(config);
let result = analyzer
.generate_publication_embedding("test_publication")
.await;
assert!(result.is_ok());
let embedding = result.expect("should succeed");
assert_eq!(embedding.publication_id, "test_publication");
assert!(embedding.predicted_impact >= 0.0);
assert!(embedding.predicted_impact <= 1.0);
}
#[tokio::test]
async fn test_h_index_calculation() {
let config = ResearchNetworkConfig::default();
let analyzer = ResearchNetworkAnalyzer::new(config);
let publications = vec![
PublicationEmbedding {
publication_id: "p1".to_string(),
title: "Test 1".to_string(),
abstract_text: "Abstract 1".to_string(),
authors: vec!["author1".to_string()],
venue: "Venue 1".to_string(),
year: 2023,
citation_count: 10,
topic_distribution: vec![],
embedding: Vector::new(vec![]),
predicted_impact: 0.5,
publication_type: PublicationType::JournalArticle,
doi: None,
last_updated: Utc::now(),
},
PublicationEmbedding {
publication_id: "p2".to_string(),
title: "Test 2".to_string(),
abstract_text: "Abstract 2".to_string(),
authors: vec!["author1".to_string()],
venue: "Venue 2".to_string(),
year: 2023,
citation_count: 5,
topic_distribution: vec![],
embedding: Vector::new(vec![]),
predicted_impact: 0.3,
publication_type: PublicationType::JournalArticle,
doi: None,
last_updated: Utc::now(),
},
];
let h_index = analyzer
.calculate_h_index(&publications)
.await
.expect("should succeed");
assert_eq!(h_index, 2.0); }
#[test]
fn test_career_stage_classification() {
let rt = tokio::runtime::Runtime::new().expect("should succeed");
let config = ResearchNetworkConfig::default();
let analyzer = ResearchNetworkAnalyzer::new(config);
let stage = rt
.block_on(analyzer.classify_career_stage(50, 5, 3.0))
.expect("should succeed");
assert!(matches!(stage, CareerStage::EarlyCareer));
let stage = rt
.block_on(analyzer.classify_career_stage(2000, 100, 25.0))
.expect("should succeed");
assert!(matches!(stage, CareerStage::SeniorCareer));
}
#[tokio::test]
async fn test_network_metrics() {
let config = ResearchNetworkConfig::default();
let analyzer = ResearchNetworkAnalyzer::new(config);
let _author_embedding = analyzer
.generate_author_embedding("test_author")
.await
.expect("should succeed");
let _publication_embedding = analyzer
.generate_publication_embedding("test_publication")
.await
.expect("should succeed");
let metrics = analyzer
.get_network_metrics()
.await
.expect("should succeed");
assert_eq!(metrics.total_authors, 1);
assert_eq!(metrics.total_publications, 1);
}
}