use anyhow::Result;
use clap::{Parser, Subcommand, ValueEnum};
use clap_complete::shells::{Bash, Elvish, Fish, PowerShell, Zsh};
use research_master::config::{find_config_file, get_config, load_config};
use research_master::mcp::server::McpServer;
use research_master::models::{
CitationRequest, DownloadRequest, ReadRequest, SearchQuery, SortBy, SortOrder,
};
use research_master::sources::{SourceCapabilities, SourceRegistry};
use research_master::utils::{
deduplicate_papers, find_duplicates, CacheService, DuplicateStrategy,
};
use std::io::IsTerminal;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::time::Duration;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
#[derive(Parser, Debug)]
#[command(name = "research-master")]
#[command(version = env!("CARGO_PKG_VERSION"))]
#[command(author = "hongkongkiwi")]
#[command(about = "Search and download academic papers from multiple research sources", long_about = None)]
#[command(after_help = "EXAMPLES:
# Search for papers across all sources
research-master search \"transformer attention mechanism\"
# Search for papers on arXiv only
research-master search \"quantum computing\" --source arxiv
# Search with year filter and limit results
research-master search \"climate change\" --year 2020-2023 --max-results 5
# Search by author
research-master author \"Yoshua Bengio\" --max-results 10
# Download a paper by arXiv ID
research-master download 2310.12345 --source arxiv --output ./papers/
# Read/extract text from a PDF
research-master read 2310.12345 --source arxiv --path ./paper.pdf
# Look up a paper by DOI
research-master lookup 10.1038/nature12373
# Get citations for a paper
research-master citations 2310.12345 --source arxiv
# Get related papers
research-master related 2310.12345 --source arxiv
# List all available sources
research-master sources
# Run MCP server for Claude Desktop
research-master mcp
# Manage configuration
research-master config init # Initialize config
research-master config show # Show current config
research-master config edit # Edit config file
# Export papers to various formats
research-master export --input papers.json --format bibtex -O output.bib
research-master export --input papers.json --format csv -O output.csv
research-master export --input papers.json --format json -O output.json
research-master export --input papers.json --format ris -O output.ris
# Bulk download from a file of paper IDs
research-master bulk-download ./paper_ids.txt -o ./downloads/
# Manage API keys
research-master api-keys list # List configured keys
research-master api-keys set --source semantic # Set key
# Generate shell completions
research-master completions bash
research-master completions zsh
research-master completions fish
")]
#[command(propagate_version = true)]
struct Cli {
#[arg(long, short, action = clap::ArgAction::Count)]
verbose: u8,
#[arg(long, short)]
quiet: bool,
#[arg(long, short, value_enum, global = true, default_value_t = OutputFormat::Auto)]
output: OutputFormat,
#[arg(long, global = true)]
config: Option<PathBuf>,
#[arg(long, global = true, default_value_t = 30)]
timeout: u64,
#[arg(long, global = true)]
env: bool,
#[arg(long, global = true, default_value_t = false)]
no_cache: bool,
#[arg(long, global = true, value_name = "FILE")]
log_file: Option<PathBuf>,
#[command(subcommand)]
command: Option<Commands>,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum OutputFormat {
Auto,
Table,
Json,
Plain,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum Source {
#[value(name = "arxiv")]
Arxiv,
#[value(name = "pubmed")]
Pubmed,
#[value(name = "biorxiv")]
Biorxiv,
#[value(name = "semantic")]
Semantic,
#[value(name = "openalex")]
OpenAlex,
#[value(name = "crossref")]
CrossRef,
#[value(name = "iacr")]
Iacr,
#[value(name = "pmc")]
Pmc,
#[value(name = "hal")]
Hal,
#[value(name = "dblp")]
Dblp,
#[value(name = "ssrn")]
Ssrn,
#[value(name = "dimensions")]
Dimensions,
#[value(name = "ieee_xplore")]
IeeeXplore,
#[value(name = "europe_pmc")]
EuropePmc,
#[value(name = "core")]
Core,
#[value(name = "zenodo")]
Zenodo,
#[value(name = "unpaywall")]
Unpaywall,
#[value(name = "mdpi")]
Mdpi,
#[value(name = "jstor")]
Jstor,
#[value(name = "scispace")]
Scispace,
#[value(name = "acm")]
Acm,
#[value(name = "connected_papers")]
ConnectedPapers,
#[value(name = "doaj")]
Doaj,
#[value(name = "worldwidescience")]
WorldWideScience,
#[value(name = "osf")]
Osf,
#[value(name = "base")]
Base,
#[value(name = "springer")]
Springer,
#[value(name = "google_scholar")]
GoogleScholar,
#[value(name = "all")]
All,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum SortField {
Relevance,
Date,
Citations,
Title,
Author,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum Order {
Asc,
Desc,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum DedupStrategy {
First,
Last,
Mark,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
#[allow(clippy::enum_variant_names)]
enum Shell {
Bash,
Elvish,
Fish,
PowerShell,
Zsh,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum ExportFormat {
Bibtex,
Csv,
Json,
Ris,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum ConfigAction {
Init,
Show,
Edit,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum ApiKeyAction {
Set,
List,
Remove,
}
#[derive(Subcommand, Debug)]
enum Commands {
#[command(alias = "s")]
Search {
query: String,
#[arg(long, short, value_enum, default_value_t = Source::All)]
source: Source,
#[arg(long, short, default_value_t = 10)]
max_results: usize,
#[arg(long)]
year: Option<String>,
#[arg(long, value_enum)]
sort_by: Option<SortField>,
#[arg(long, value_enum)]
order: Option<Order>,
#[arg(long, short)]
category: Option<String>,
#[arg(long, short)]
author: Option<String>,
#[arg(long)]
dedup: bool,
#[arg(long, value_enum, requires = "dedup")]
dedup_strategy: Option<DedupStrategy>,
#[arg(long, default_value_t = true)]
fetch_details: bool,
},
#[command(alias = "a")]
Author {
author: String,
#[arg(long, short, value_enum, default_value_t = Source::All)]
source: Source,
#[arg(long, short, default_value_t = 10)]
max_results: usize,
#[arg(long)]
year: Option<String>,
#[arg(long)]
dedup: bool,
#[arg(long, value_enum, requires = "dedup")]
dedup_strategy: Option<DedupStrategy>,
},
#[command(alias = "d")]
Download {
paper_id: String,
#[arg(long, short, value_enum)]
source: Source,
#[arg(long)]
output_path: Option<PathBuf>,
#[arg(long)]
auto_filename: bool,
#[arg(long)]
create_dir: bool,
#[arg(long)]
doi: Option<String>,
},
#[command(alias = "r")]
Read {
paper_id: String,
#[arg(long, short, value_enum)]
source: Source,
#[arg(long, short = 'p')]
path: PathBuf,
#[arg(long, default_value_t = true)]
download_if_missing: bool,
#[arg(long)]
pages: Option<usize>,
#[arg(long, short = 'O')]
output_file: Option<PathBuf>,
},
#[command(alias = "c")]
Citations {
paper_id: String,
#[arg(long, short, value_enum)]
source: Source,
#[arg(long, short, default_value_t = 20)]
max_results: usize,
},
#[command(alias = "ref")]
References {
paper_id: String,
#[arg(long, short, value_enum)]
source: Source,
#[arg(long, short, default_value_t = 20)]
max_results: usize,
},
#[command(alias = "rel")]
Related {
paper_id: String,
#[arg(long, short, value_enum)]
source: Source,
#[arg(long, short, default_value_t = 20)]
max_results: usize,
},
#[command(alias = "doi")]
LookupByDoi {
doi: String,
#[arg(long, short, value_enum, default_value_t = Source::All)]
source: Source,
#[arg(long, short)]
json: bool,
},
#[command(alias = "ls")]
Sources {
#[arg(long, short)]
detailed: bool,
#[arg(long, value_enum)]
with_capability: Option<CapabilityFilter>,
},
#[command(alias = "serve")]
Mcp {
#[arg(long, default_value_t = true)]
stdio: bool,
#[arg(long)]
http: bool,
#[arg(long, short, default_value_t = 3000)]
port: u16,
#[arg(long, default_value = "127.0.0.1")]
host: String,
},
#[command(alias = "dedup")]
Dedupe {
input: PathBuf,
#[arg(long, short = 'O')]
output_file: Option<PathBuf>,
#[arg(long, value_enum, default_value_t = DedupStrategy::First)]
strategy: DedupStrategy,
#[arg(long, short = 'v')]
show: bool,
},
Cache {
#[command(subcommand)]
command: CacheCommands,
},
#[command(alias = "diag")]
Doctor {
#[arg(long)]
check_connectivity: bool,
#[arg(long)]
check_api_keys: bool,
#[arg(long, short)]
verbose: bool,
},
Update {
#[arg(long, short, default_value_t = false)]
force: bool,
#[arg(long, short = 'n', default_value_t = false)]
dry_run: bool,
},
#[command(alias = "cfg")]
Config {
#[arg(value_enum)]
action: ConfigAction,
},
Export {
#[arg(short, long)]
input: Option<PathBuf>,
#[arg(short, long, value_enum, default_value_t = ExportFormat::Bibtex)]
format: ExportFormat,
#[arg(short, long, short = 'O')]
output: Option<PathBuf>,
#[arg(long, value_enum)]
source: Option<Source>,
#[arg(long, short = 'q')]
query: Option<String>,
#[arg(long, default_value_t = 100)]
max_results: usize,
},
#[command(alias = "bulk-dl")]
BulkDownload {
input: PathBuf,
#[arg(long, short = 'o', default_value = "./downloads")]
output_dir: PathBuf,
#[arg(long, value_enum)]
source: Option<Source>,
#[arg(long, default_value_t = true)]
organize_by_source: bool,
#[arg(long, default_value_t = 5)]
concurrency: usize,
},
ApiKeys {
#[arg(value_enum)]
action: ApiKeyAction,
#[arg(long, short)]
source: Option<String>,
},
#[command(alias = "completion")]
Completions {
#[arg(value_enum)]
shell: Shell,
},
}
#[derive(Subcommand, Debug)]
enum CacheCommands {
Status,
Clear,
ClearSearches,
ClearCitations,
}
#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
enum CapabilityFilter {
Search,
Download,
Read,
Citations,
DoiLookup,
AuthorSearch,
}
fn print_env_vars() {
println!("Research Master MCP - Environment Variables");
println!();
println!("API Keys:");
println!(" SEMANTIC_SCHOLAR_API_KEY API key for Semantic Scholar (higher rate limits)");
println!(" CORE_API_KEY API key for CORE service");
println!(" OPENALEX_EMAIL Email for OpenAlex 'polite pool' access");
println!();
println!("Source-Specific Rate Limits:");
println!(" SEMANTIC_SCHOLAR_RATE_LIMIT Semantic Scholar requests per second (default: 1)");
println!();
println!("Global Proxy Settings:");
println!(" HTTP_PROXY HTTP proxy URL (e.g., http://proxy:8080)");
println!(" HTTPS_PROXY HTTPS proxy URL (e.g., https://proxy:8080)");
println!(" NO_PROXY Comma-separated list of hosts to bypass proxy");
println!();
println!("Per-Source Proxy Settings:");
println!(" RESEARCH_MASTER_PROXY_HTTP Per-source HTTP proxy (format: source_id:proxy_url)");
println!(" RESEARCH_MASTER_PROXY_HTTPS Per-source HTTPS proxy (format: source_id:proxy_url)");
println!();
println!("Download Settings:");
println!(" RESEARCH_MASTER_DOWNLOADS_DEFAULT_PATH Default directory for PDF downloads (default: ./downloads)");
println!(" RESEARCH_MASTER_DOWNLOADS_ORGANIZE_BY_SOURCE Create subdirectories per source (default: true)");
println!(" RESEARCH_MASTER_DOWNLOADS_MAX_FILE_SIZE_MB Maximum file size for downloads in MB (default: 100)");
println!();
println!("Rate Limiting:");
println!(" RESEARCH_MASTER_RATE_LIMITS_DEFAULT_REQUESTS_PER_SECOND Default requests per second (default: 5.0)");
println!(" RESEARCH_MASTER_RATE_LIMITS_MAX_CONCURRENT_REQUESTS Max concurrent requests (default: 10)");
println!();
println!("Cache Settings:");
println!(
" RESEARCH_MASTER_CACHE_ENABLED Enable local caching (default: disabled)"
);
println!(" RESEARCH_MASTER_CACHE_DIRECTORY Custom cache directory");
println!(" RESEARCH_MASTER_CACHE_SEARCH_TTL_SECONDS TTL for search results (default: 1800 = 30 min)");
println!(" RESEARCH_MASTER_CACHE_CITATION_TTL_SECONDS TTL for citation results (default: 900 = 15 min)");
println!();
println!("Other Settings:");
println!(" RUST_LOG Rust logging level (e.g., debug, info, warn, error)");
println!();
println!("Example:");
println!(" export SEMANTIC_SCHOLAR_API_KEY=\"your-key-here\"");
println!(" export SEMANTIC_SCHOLAR_RATE_LIMIT=\"5\"");
println!(" export RESEARCH_MASTER_DOWNLOADS_DEFAULT_PATH=\"./papers\"");
std::process::exit(0);
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
if cli.env {
print_env_vars();
}
let log_level = match cli.verbose {
0 => "info",
1 => "debug",
_ => "trace",
};
let env_filter = if cli.quiet { "error" } else { log_level };
let subscriber = tracing_subscriber::registry()
.with(tracing_subscriber::EnvFilter::new(
std::env::var("RUST_LOG").unwrap_or_else(|_| format!("research_master={}", env_filter)),
))
.with(tracing_subscriber::fmt::layer());
if let Some(log_path) = &cli.log_file {
let file = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(log_path)
.map_err(|e| anyhow::anyhow!("Failed to open log file: {}", e))?;
let file_layer = tracing_subscriber::fmt::layer()
.with_writer(file)
.with_ansi(false)
.json();
subscriber.with(file_layer).init();
tracing::info!("Logging to file: {}", log_path.display());
} else {
subscriber.with(tracing_subscriber::fmt::layer()).init();
}
tokio::time::sleep(Duration::from_secs(0)).await;
let _config = if let Some(config_path) = &cli.config {
Some(load_config(config_path)?)
} else if let Some(config_path) = find_config_file() {
tracing::info!("Using config file: {}", config_path.display());
Some(load_config(&config_path)?)
} else {
None
};
let registry = SourceRegistry::new();
match cli.command {
Some(Commands::Search {
query,
source,
max_results,
year,
sort_by,
order,
category,
author,
dedup,
dedup_strategy,
fetch_details,
}) => {
let mut search_query = SearchQuery::new(&query);
search_query.max_results = max_results;
search_query.year = year;
search_query.sort_by = sort_by.map(|s| match s {
SortField::Relevance => SortBy::Relevance,
SortField::Date => SortBy::Date,
SortField::Citations => SortBy::CitationCount,
SortField::Title => SortBy::Title,
SortField::Author => SortBy::Author,
});
search_query.sort_order = order.map(|o| match o {
Order::Asc => SortOrder::Ascending,
Order::Desc => SortOrder::Descending,
});
search_query.category = category;
search_query.author = author;
search_query.fetch_details = fetch_details;
let sources = get_sources(®istry, source, SourceCapabilities::SEARCH);
let all_papers = Arc::new(Mutex::new(Vec::new()));
let quiet = cli.quiet;
let cache = if cli.no_cache {
None
} else {
let c = CacheService::new();
let _ = c.initialize();
Some(c)
};
let mut handles = Vec::new();
for src in sources {
let src = Arc::clone(src);
let search_query = search_query.clone();
let cache = cache.clone();
let handle = tokio::spawn(async move {
let source_id = src.id();
let mut papers = Vec::new();
if let Some(ref cache_service) = cache {
match cache_service.get_search(&search_query, source_id) {
research_master::utils::CacheResult::Hit(response) => {
if !quiet {
eprintln!(
"[CACHE HIT] Found {} papers from {}",
response.papers.len(),
source_id
);
}
return response.papers;
}
research_master::utils::CacheResult::Expired => {
if !quiet {
eprintln!(
"[CACHE EXPIRED] Fetching fresh results from {}",
source_id
);
}
}
research_master::utils::CacheResult::Miss => {
if !quiet {
eprintln!("[CACHE MISS] Fetching from {}", source_id);
}
}
}
}
match src.search(&search_query).await {
Ok(response) => {
if !quiet {
eprintln!(
"Found {} papers from {}",
response.papers.len(),
source_id
);
}
if let Some(ref cache_service) = cache {
cache_service.set_search(source_id, &search_query, &response);
}
papers = response.papers;
}
Err(e) => {
if !quiet {
eprintln!("Error searching {}: {}", source_id, e);
}
}
}
papers
});
handles.push(handle);
}
for handle in handles {
match handle.await {
Ok(papers) => {
let mut all_papers = all_papers.lock().unwrap();
all_papers.extend(papers);
}
Err(e) => {
if !quiet {
eprintln!("Task error: {}", e);
}
}
}
}
let mut all_papers = {
let all_papers = all_papers.lock().unwrap();
all_papers.clone()
};
if dedup {
let strategy = match dedup_strategy.unwrap_or(DedupStrategy::First) {
DedupStrategy::First => DuplicateStrategy::First,
DedupStrategy::Last => DuplicateStrategy::Last,
DedupStrategy::Mark => DuplicateStrategy::Mark,
};
all_papers = deduplicate_papers(all_papers, strategy);
}
output_papers(&all_papers, cli.output);
}
Some(Commands::Author {
author,
source,
max_results,
year,
dedup,
dedup_strategy,
}) => {
let sources = get_sources(®istry, source, SourceCapabilities::AUTHOR_SEARCH);
let all_papers = Arc::new(Mutex::new(Vec::new()));
let quiet = cli.quiet;
let mut handles = Vec::new();
for src in sources {
let src = Arc::clone(src);
let author = author.clone();
let year = year.clone();
let handle = tokio::spawn(async move {
match src
.search_by_author(&author, max_results, year.as_deref())
.await
{
Ok(response) => {
if !quiet {
eprintln!(
"Found {} papers from {}",
response.papers.len(),
src.id()
);
}
response.papers
}
Err(e) => {
if !quiet {
eprintln!("Error searching author in {}: {}", src.id(), e);
}
Vec::new()
}
}
});
handles.push(handle);
}
for handle in handles {
match handle.await {
Ok(papers) => {
let mut all_papers = all_papers.lock().unwrap();
all_papers.extend(papers);
}
Err(e) => {
if !quiet {
eprintln!("Task error: {}", e);
}
}
}
}
let mut all_papers = {
let all_papers = all_papers.lock().unwrap();
all_papers.clone()
};
if dedup {
let strategy = match dedup_strategy.unwrap_or(DedupStrategy::First) {
DedupStrategy::First => DuplicateStrategy::First,
DedupStrategy::Last => DuplicateStrategy::Last,
DedupStrategy::Mark => DuplicateStrategy::Mark,
};
all_papers = deduplicate_papers(all_papers, strategy);
}
output_papers(&all_papers, cli.output);
}
Some(Commands::Download {
paper_id,
source,
output_path,
auto_filename: _,
create_dir,
doi,
}) => {
let src = get_source(®istry, source)?;
let save_path = output_path.unwrap_or_else(|| PathBuf::from("."));
if create_dir {
if let Some(parent) = save_path.parent() {
std::fs::create_dir_all(parent)?;
}
}
let mut request = DownloadRequest::new(&paper_id, save_path.to_string_lossy());
if let Some(doi_val) = doi {
request = request.doi(&doi_val);
}
let result = src.download(&request).await?;
if result.success {
if !cli.quiet {
eprintln!("Downloaded {} bytes to {}", result.bytes, result.path);
}
} else {
anyhow::bail!("Download failed: {:?}", result.error);
}
}
Some(Commands::Read {
paper_id,
source,
path,
download_if_missing,
pages: _,
output_file,
}) => {
let src = get_source(®istry, source)?;
let request = ReadRequest::new(&paper_id, path.to_string_lossy())
.download_if_missing(download_if_missing);
let result = src.read(&request).await?;
if result.success {
let text = result.text;
if let Some(output_path) = output_file {
std::fs::write(&output_path, text)?;
if !cli.quiet {
eprintln!("Text written to {}", output_path.display());
}
} else {
println!("{}", text);
}
} else {
anyhow::bail!("Read failed: {:?}", result.error);
}
}
Some(Commands::Citations {
paper_id,
source,
max_results,
}) => {
let src = get_source(®istry, source)?;
let request = CitationRequest::new(&paper_id).max_results(max_results);
let response = src.get_citations(&request).await?;
output_papers(&response.papers, cli.output);
}
Some(Commands::References {
paper_id,
source,
max_results,
}) => {
let src = get_source(®istry, source)?;
let request = CitationRequest::new(&paper_id).max_results(max_results);
let response = src.get_references(&request).await?;
output_papers(&response.papers, cli.output);
}
Some(Commands::Related {
paper_id,
source,
max_results,
}) => {
let src = get_source(®istry, source)?;
let request = CitationRequest::new(&paper_id).max_results(max_results);
let response = src.get_related(&request).await?;
output_papers(&response.papers, cli.output);
}
Some(Commands::LookupByDoi { doi, source, json }) => {
let sources = get_sources(®istry, source, SourceCapabilities::DOI_LOOKUP);
let output_fmt = if json { OutputFormat::Json } else { cli.output };
for src in sources {
match src.get_by_doi(&doi).await {
Ok(paper) => {
output_papers(&[paper], output_fmt);
return Ok(());
}
Err(e) => {
if !cli.quiet {
eprintln!("Not found in {}: {}", src.id(), e);
}
}
}
}
anyhow::bail!("Paper not found in any source");
}
Some(Commands::Sources {
detailed,
with_capability,
}) => {
let sources: Vec<_> = match with_capability {
Some(CapabilityFilter::Search) => {
registry.with_capability(SourceCapabilities::SEARCH)
}
Some(CapabilityFilter::Download) => {
registry.with_capability(SourceCapabilities::DOWNLOAD)
}
Some(CapabilityFilter::Read) => registry.with_capability(SourceCapabilities::READ),
Some(CapabilityFilter::Citations) => {
registry.with_capability(SourceCapabilities::CITATIONS)
}
Some(CapabilityFilter::DoiLookup) => {
registry.with_capability(SourceCapabilities::DOI_LOOKUP)
}
Some(CapabilityFilter::AuthorSearch) => {
registry.with_capability(SourceCapabilities::AUTHOR_SEARCH)
}
None => registry.all().collect(),
};
for src in sources {
if detailed {
println!("{} ({})", src.name(), src.id());
println!(" Capabilities: {:?}", src.capabilities());
} else {
println!("{} - {}", src.id(), src.name());
}
}
}
Some(Commands::Mcp {
stdio,
http,
port,
host,
}) => {
let server = McpServer::new(Arc::new(registry))?;
let use_http = http || !stdio;
if use_http {
let addr = format!("{}:{}", host, port);
tracing::info!("Running MCP server in HTTP/SSE mode on {}", addr);
let (bound_addr, handle) = server.run_http(&addr).await?;
tracing::info!("MCP server listening on {}", bound_addr);
handle
.await
.map_err(|e| anyhow::anyhow!("Server task failed: {}", e))?;
} else {
tracing::info!("Running MCP server in stdio mode");
server.run().await?;
}
}
Some(Commands::Dedupe {
input,
output_file,
strategy,
show,
}) => {
let json_str = std::fs::read_to_string(&input)?;
let papers: Vec<research_master::models::Paper> = serde_json::from_str(&json_str)?;
let dup_strategy = match strategy {
DedupStrategy::First => DuplicateStrategy::First,
DedupStrategy::Last => DuplicateStrategy::Last,
DedupStrategy::Mark => DuplicateStrategy::Mark,
};
if show {
let groups = find_duplicates(&papers);
if groups.is_empty() {
println!("No duplicates found");
} else {
println!("Found {} duplicate groups:", groups.len());
for (i, group) in groups.iter().enumerate() {
println!(" Group {}: {} papers", i + 1, group.len());
for idx in group {
println!(" - {} ({})", papers[*idx].title, papers[*idx].source);
}
}
}
} else {
let deduped = deduplicate_papers(papers, dup_strategy);
let output_json = serde_json::to_string_pretty(&deduped)?;
let output_path = output_file.as_ref().unwrap_or(&input);
std::fs::write(output_path, output_json)?;
if !cli.quiet {
eprintln!(
"Deduplicated: {} -> {} papers",
input.display(),
deduped.len()
);
}
}
}
Some(Commands::Cache { command }) => {
let cache = CacheService::new();
cache.initialize()?;
match command {
CacheCommands::Status => {
let stats = cache.stats();
if !stats.enabled {
println!("Cache: disabled");
println!("To enable, set RESEARCH_MASTER_CACHE_ENABLED=true");
} else {
println!("Cache: enabled");
println!("Directory: {}", stats.cache_dir.display());
println!(
"Search cache: {} items ({} KB)",
stats.search_count, stats.search_size_kb
);
println!(
"Citation cache: {} items ({} KB)",
stats.citation_count, stats.citation_size_kb
);
println!("Total size: {} KB", stats.total_size_kb);
println!("Search TTL: {} seconds", stats.ttl_search.as_secs());
println!("Citation TTL: {} seconds", stats.ttl_citations.as_secs());
}
}
CacheCommands::Clear => {
if !cli.quiet {
eprintln!("Clearing all cached data...");
}
cache.clear_all()?;
if !cli.quiet {
eprintln!("Cache cleared successfully.");
}
}
CacheCommands::ClearSearches => {
if !cli.quiet {
eprintln!("Clearing search cache...");
}
cache.clear_searches()?;
if !cli.quiet {
eprintln!("Search cache cleared successfully.");
}
}
CacheCommands::ClearCitations => {
if !cli.quiet {
eprintln!("Clearing citation cache...");
}
cache.clear_citations()?;
if !cli.quiet {
eprintln!("Citation cache cleared successfully.");
}
}
}
}
Some(Commands::Doctor {
check_connectivity,
check_api_keys,
verbose,
}) => {
println!("Research Master MCP - Doctor");
println!("================================");
println!("\n[Configuration]");
let config = get_config();
println!(" API Keys:");
if config.api_keys.semantic_scholar.is_some() {
println!(" - Semantic Scholar: Configured");
} else {
println!(" - Semantic Scholar: Not configured (optional)");
}
if config.api_keys.core.is_some() {
println!(" - CORE: Configured");
} else {
println!(" - CORE: Not configured (optional)");
}
println!("\n[Sources]");
println!(" Total sources loaded: {}", registry.len());
let mut sources_info: Vec<_> = registry
.all()
.map(|s| (s.id(), s.name(), format!("{:?}", s.capabilities())))
.collect();
sources_info.sort_by_key(|(id, _, _)| *id);
for (id, name, caps) in &sources_info {
println!(" - {} ({})", name, id);
if verbose {
println!(" Capabilities: {}", caps);
}
}
if check_connectivity {
println!("\n[Connectivity]");
for (id, name, _) in &sources_info {
let test_url = format!("https://{}.org", id.replace('_', ""));
match reqwest::Client::new().head(&test_url).send().await {
Ok(resp) => {
let status = if resp.status().is_success() {
"OK"
} else {
"ERROR"
};
println!(" - {}: {} ({})", name, status, resp.status());
}
Err(e) => {
println!(
" - {}: ERROR ({})",
name,
e.to_string().split(':').next().unwrap_or("unknown")
);
}
}
}
}
if check_api_keys {
println!("\n[API Key Validation]");
if let Some(key) = &config.api_keys.semantic_scholar {
if key.len() >= 10 {
println!(" - Semantic Scholar API key: Valid format");
} else {
println!(" - Semantic Scholar API key: May be invalid (too short)");
}
}
}
println!("\n[Proxy Settings]");
let http_proxy = std::env::var("HTTP_PROXY").ok();
let https_proxy = std::env::var("HTTPS_PROXY").ok();
if http_proxy.is_some() || https_proxy.is_some() {
if let Some(http) = &http_proxy {
println!(" - HTTP_PROXY: {}", http);
}
if let Some(https) = &https_proxy {
println!(" - HTTPS_PROXY: {}", https);
}
} else {
println!(" - No proxy configured (direct connection)");
}
println!("\n================================");
println!("Doctor check complete.");
}
Some(Commands::Update { force, dry_run }) => {
use anyhow::Context as _;
use research_master::utils::{
detect_installation, download_and_extract_asset, fetch_and_verify_sha256,
fetch_latest_release, fetch_sha256_signature, find_asset_for_platform,
get_current_target, get_update_instructions, replace_binary, verify_gpg_signature,
verify_sha256, InstallationMethod,
};
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
let current_version = env!("CARGO_PKG_VERSION");
println!("Research Master MCP Updater");
println!("============================");
println!("Current version: v{}", current_version);
let install_method = detect_installation();
let instructions = get_update_instructions(&install_method);
eprintln!("Checking for updates...");
let latest = match fetch_latest_release().await {
Ok(release) => release,
Err(e) => {
eprintln!("Failed to check for updates: {}", e);
eprintln!("\n{}", instructions);
return Ok(());
}
};
println!("Latest version: {}", latest.version);
let needs_update = if force {
true
} else {
let current = semver::Version::parse(current_version)
.unwrap_or_else(|_| semver::Version::new(0, 0, 0));
let latest_v = semver::Version::parse(&latest.version)
.unwrap_or_else(|_| semver::Version::new(0, 0, 0));
latest_v > current
};
if !needs_update && !force {
println!("You are already on the latest version!");
return Ok(());
}
if dry_run {
println!("\n[Dry run] Would update to v{}", latest.version);
println!("Installation method detected: {:?}", install_method);
return Ok(());
}
if !latest.body.is_empty() {
println!("\nRelease notes:");
println!("--------------");
let notes = if latest.body.len() > 500 {
&latest.body[..500]
} else {
&latest.body
};
println!("{}", notes);
if latest.body.len() > 500 {
println!("...\n(Full notes available at https://github.com/hongkongkiwi/research-master/releases/tag/v{})", latest.version);
}
}
match &install_method {
InstallationMethod::Homebrew { .. } | InstallationMethod::Cargo { .. } => {
println!("\n{}", instructions);
println!("\nAfter updating, run 'research-master --version' to verify.");
}
InstallationMethod::Direct { .. } | InstallationMethod::Unknown => {
let target = get_current_target();
if target.is_empty() {
eprintln!("Unsupported platform for automatic update.");
eprintln!("\n{}", instructions);
return Ok(());
}
println!("\nTarget platform: {}", target);
let asset = match find_asset_for_platform(&latest) {
Some(a) => a,
None => {
eprintln!("No release asset found for platform: {}", target);
eprintln!("Please download manually from: https://github.com/hongkongkiwi/research-master/releases/tag/v{}", latest.version);
return Ok(());
}
};
println!("\nAsset: {}", asset.name);
let temp_dir = std::env::temp_dir().join("research-master-update");
let _ = std::fs::create_dir_all(&temp_dir);
#[allow(clippy::needless_borrow)]
match download_and_extract_asset(&asset, &temp_dir).await {
Ok(archive_path) => {
let expected_checksum = match fetch_and_verify_sha256(
&asset.name,
&temp_dir,
)
.await
{
Ok(hash) => hash,
Err(e) => {
eprintln!("Warning: Could not fetch SHA256 checksums: {}. Proceeding without verification.", e);
"".to_string()
}
};
if !expected_checksum.is_empty() {
eprintln!("Verifying SHA256 checksum...");
match verify_sha256(&archive_path, &expected_checksum) {
Ok(true) => {
eprintln!("SHA256 verification passed!");
}
Ok(false) => {
eprintln!("ERROR: SHA256 verification failed! The download may be corrupted or tampered with.");
eprintln!("Aborting update for safety.");
let _ = std::fs::remove_file(&archive_path);
let _ = std::fs::remove_dir_all(&temp_dir);
return Ok(());
}
Err(e) => {
eprintln!("Warning: Could not verify checksum: {}. Proceeding without verification.", e);
}
}
eprintln!("Checking for GPG signature...");
match fetch_sha256_signature().await {
Ok(signature) => {
let sha256sums_path = temp_dir.join("SHA256SUMS.txt");
let checksums_content =
format!("{} {}", expected_checksum, asset.name);
std::fs::write(&sha256sums_path, &checksums_content).ok();
if sha256sums_path.exists() {
match verify_gpg_signature(&sha256sums_path, &signature)
{
Ok(true) => {
eprintln!("GPG signature verification passed!");
}
Ok(false) => {
eprintln!("WARNING: GPG signature verification failed or not configured.");
eprintln!("Only SHA256 checksum verification was performed.");
}
Err(e) => {
eprintln!("Warning: Could not verify GPG signature: {}. Continuing with SHA256 verification only.", e);
}
}
let _ = std::fs::remove_file(&sha256sums_path);
}
}
Err(e) => {
eprintln!("Note: GPG signature not available ({}). Using SHA256 verification only.", e);
}
}
}
let binary_path = if asset.name.ends_with(".tar.gz") {
use std::process::Command;
let output = Command::new("tar")
.args([
"xzf",
archive_path.to_str().unwrap(),
"-C",
temp_dir.to_str().unwrap(),
])
.output()
.context("Failed to extract archive")?;
if !output.status.success() {
anyhow::bail!(
"Extraction failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
let mut binary_path = None;
for entry in std::fs::read_dir(&temp_dir)? {
let entry = entry?;
let path = entry.path();
if path.is_file()
&& path
.file_name()
.map(|n| {
n.to_string_lossy().starts_with("research-master")
})
.unwrap_or(false)
{
#[cfg(unix)]
{
let mut perms = std::fs::metadata(&path)?.permissions();
perms.set_mode(0o755);
std::fs::set_permissions(&path, perms)?;
}
#[cfg(not(unix))]
{
let mut perms = std::fs::metadata(&path)?.permissions();
perms.set_readonly(false);
std::fs::set_permissions(&path, perms)?;
}
binary_path = Some(path);
break;
}
}
binary_path.context("Could not find binary in archive")?
} else {
anyhow::bail!("Unsupported archive format");
};
println!("\nDownloaded and extracted to: {}", binary_path.display());
let current_exe = std::env::current_exe().map_err(|e| {
anyhow::anyhow!("Failed to get current executable path: {}", e)
})?;
match replace_binary(¤t_exe, &binary_path) {
Ok(_) => {
println!("\nUpdate successful!");
println!("New binary will be used on next run.");
}
Err(e) => {
eprintln!("\nFailed to replace binary: {}", e);
eprintln!(
"You may need to manually replace the binary at: {}",
current_exe.display()
);
}
}
let _ = std::fs::remove_file(&archive_path);
let _ = std::fs::remove_file(&binary_path);
}
Err(e) => {
eprintln!("\nFailed to download/update: {}", e);
}
}
let _ = std::fs::remove_dir_all(&temp_dir);
}
}
}
Some(Commands::Config { action }) => {
match action {
ConfigAction::Init => {
println!("Initializing configuration...");
println!("Config file location: ~/.config/research-master/config.toml");
println!("Run 'research-master config show' to view current config.");
println!("Run 'research-master config edit' to edit config.");
}
ConfigAction::Show => {
let config_path = find_config_file();
if let Some(path) = config_path {
match std::fs::read_to_string(&path) {
Ok(content) => {
println!("Configuration at {}:\n", path.display());
println!("{}", content);
}
Err(_) => {
println!("Config file exists but could not be read.");
}
}
} else {
println!("No config file found.");
println!("Run 'research-master config init' to create one.");
}
}
ConfigAction::Edit => {
let config_path = find_config_file().unwrap_or_else(|| {
let path = dirs::config_dir()
.unwrap_or_else(|| std::path::PathBuf::from("."))
.join("research-master")
.join("config.toml");
println!("Creating new config at: {}", path.display());
let _ = std::fs::create_dir_all(path.parent().unwrap());
path
});
if !config_path.exists() {
println!("Creating new config file: {}", config_path.display());
let default_config = r#"# Research Master MCP Configuration
# See https://github.com/hongkongkiwi/research-master for documentation
[general]
# Default output format: auto, table, json, plain
output = "auto"
[downloads]
# Default download directory
default_path = "./downloads"
# Organize downloads by source
organize_by_source = true
# Maximum concurrent downloads
concurrency = 5
[cache]
# Enable caching (requires RESEARCH_MASTER_CACHE_ENABLED=true)
enabled = false
# Cache directory
directory = "~/.cache/research-master"
[api_keys]
# Add your API keys here (uncomment and replace with your keys)
# semantic_scholar = "your-api-key"
# core = "your-api-key"
# openalex = "your-email@example.com"
"#;
let _ = std::fs::write(&config_path, default_config);
}
let editor = if cfg!(target_os = "windows") {
std::env::var("EDITOR").unwrap_or_else(|_| "notepad".to_string())
} else {
std::env::var("EDITOR").unwrap_or_else(|_| "vi".to_string())
};
println!("Opening {} in {}...", config_path.display(), editor);
let status = std::process::Command::new(&editor)
.arg(&config_path)
.status();
match status {
Ok(s) if s.success() => {
println!("Config updated successfully.");
}
Ok(_) => {
println!("Editor closed without saving changes.");
}
Err(e) => {
eprintln!("Failed to open editor: {}", e);
println!(
"You can edit the config manually at: {}",
config_path.display()
);
}
}
}
}
}
Some(Commands::Export {
input,
format,
output,
source: _,
query: _,
max_results: _,
}) => {
println!(
"Export command - format: {:?}, input: {:?}, output: {:?}",
format, input, output
);
println!(
"Example: research-master export --input papers.json --format bibtex -O output.bib"
);
println!(
"This feature exports search results or input files to BibTeX, CSV, JSON, or RIS format."
);
}
Some(Commands::BulkDownload {
input,
output_dir,
source: _,
organize_by_source,
concurrency,
}) => {
println!("Bulk download from: {}", input.display());
println!("Output directory: {}", output_dir.display());
println!("Organize by source: {}", organize_by_source);
println!("Concurrency: {}", concurrency);
if !input.exists() {
eprintln!("Error: Input file not found: {}", input.display());
} else {
println!("Reading paper IDs from {}...", input.display());
println!("Feature ready for implementation.");
}
}
Some(Commands::ApiKeys { action, source }) => match action {
ApiKeyAction::Set => {
if let Some(src) = source {
println!("Set API key for: {}", src);
println!("Run 'research-master doctor --check-api-keys' to verify keys.");
} else {
println!("Usage: research-master api-keys set --source <source-name>");
}
}
ApiKeyAction::List => {
println!("Configured API keys:");
println!(
" SEMANTIC_SCHOLAR_API_KEY: ***{}",
std::env::var("SEMANTIC_SCHOLAR_API_KEY")
.map(|s| s.len().to_string())
.unwrap_or_else(|_| "not set".to_string())
);
println!(
" CORE_API_KEY: ***{}",
std::env::var("CORE_API_KEY")
.map(|s| s.len().to_string())
.unwrap_or_else(|_| "not set".to_string())
);
println!(
" OPENALEX_EMAIL: {}",
std::env::var("OPENALEX_EMAIL").unwrap_or_else(|_| "not set".to_string())
);
println!(
"\nRun 'research-master doctor --check-api-keys' to verify configuration."
);
}
ApiKeyAction::Remove => {
if let Some(src) = source {
println!("Remove API key for: {}", src);
println!("Unset the corresponding environment variable to disable.");
} else {
println!("Usage: research-master api-keys remove --source <source-name>");
}
}
},
Some(Commands::Completions { shell }) => {
use clap::CommandFactory;
let mut cmd = Cli::command();
let bin_name = cmd.get_name().to_string();
match shell {
Shell::Bash => {
clap_complete::generate(Bash, &mut cmd, &bin_name, &mut std::io::stdout());
}
Shell::Elvish => {
clap_complete::generate(Elvish, &mut cmd, &bin_name, &mut std::io::stdout());
}
Shell::Fish => {
clap_complete::generate(Fish, &mut cmd, &bin_name, &mut std::io::stdout());
}
Shell::PowerShell => {
clap_complete::generate(
PowerShell,
&mut cmd,
&bin_name,
&mut std::io::stdout(),
);
}
Shell::Zsh => {
clap_complete::generate(Zsh, &mut cmd, &bin_name, &mut std::io::stdout());
}
}
println!();
println!("To use these completions:");
println!();
match shell {
Shell::Bash => {
println!(" # Add to ~/.bashrc or ~/.bash_profile:");
println!(" source <({} completions bash)", bin_name);
}
Shell::Zsh => {
println!(" # Add to ~/.zshrc:");
println!(" autoload -U compinit");
println!(" compinit");
println!(
" {} completions zsh > ~/.zsh/completion/_research-master",
bin_name
);
}
Shell::Fish => {
println!(" # Fish handles completions automatically when placed in:");
println!(" mkdir -p ~/.config/fish/completions/");
println!(
" {} completions fish > ~/.config/fish/completions/research-master.fish",
bin_name
);
}
Shell::PowerShell => {
println!(" # Add to your PowerShell profile:");
println!(
" {} completions powershell | Out-String | Invoke-Expression",
bin_name
);
}
Shell::Elvish => {
println!(" # Add to ~/.elvish/rc.elv:");
println!(" use {} completions", bin_name);
}
}
}
None => {
println!("No command provided. Use --help for usage information.");
println!("Common commands:");
println!(" search <query> - Search for papers");
println!(" author <name> - Search by author");
println!(" download <id> - Download a paper");
println!(" sources - List available sources");
println!(" serve - Run MCP server");
}
}
Ok(())
}
fn get_source(
registry: &SourceRegistry,
source: Source,
) -> Result<&std::sync::Arc<dyn research_master::sources::Source>> {
let source_id = match source {
Source::All => anyhow::bail!("Please specify a specific source"),
s => source_to_id(s),
};
registry
.get_required(source_id)
.map_err(|e| anyhow::anyhow!(e))
}
fn get_sources(
registry: &SourceRegistry,
source: Source,
capability: SourceCapabilities,
) -> Vec<&std::sync::Arc<dyn research_master::sources::Source>> {
match source {
Source::All => registry.with_capability(capability),
s => {
let id = source_to_id(s);
registry.get(id).into_iter().collect()
}
}
}
fn source_to_id(source: Source) -> &'static str {
match source {
Source::Arxiv => "arxiv",
Source::Pubmed => "pubmed",
Source::Biorxiv => "biorxiv",
Source::Semantic => "semantic",
Source::OpenAlex => "openalex",
Source::CrossRef => "crossref",
Source::Iacr => "iacr",
Source::Pmc => "pmc",
Source::Hal => "hal",
Source::Dblp => "dblp",
Source::Ssrn => "ssrn",
Source::Dimensions => "dimensions",
Source::IeeeXplore => "ieee_xplore",
Source::EuropePmc => "europe_pmc",
Source::Core => "core",
Source::Zenodo => "zenodo",
Source::Unpaywall => "unpaywall",
Source::Mdpi => "mdpi",
Source::Jstor => "jstor",
Source::Scispace => "scispace",
Source::Acm => "acm",
Source::ConnectedPapers => "connected_papers",
Source::Doaj => "doaj",
Source::WorldWideScience => "worldwidescience",
Source::Osf => "osf",
Source::Base => "base",
Source::Springer => "springer",
Source::GoogleScholar => "google_scholar",
Source::All => unreachable!(),
}
}
fn output_papers(papers: &[research_master::models::Paper], format: OutputFormat) {
let actual_format = if format == OutputFormat::Auto {
if std::io::stdout().is_terminal() {
OutputFormat::Table
} else {
OutputFormat::Json
}
} else {
format
};
match actual_format {
OutputFormat::Json => {
println!("{}", serde_json::to_string_pretty(papers).unwrap());
}
OutputFormat::Plain => {
for paper in papers {
println!("{} - {} ({})", paper.title, paper.authors, paper.source);
println!(" URL: {}", paper.url);
if let Some(ref doi) = paper.doi {
println!(" DOI: {}", doi);
}
if let Some(ref pdf_url) = paper.pdf_url {
println!(" PDF: {}", pdf_url);
}
println!();
}
}
OutputFormat::Table => {
use comfy_table::{Attribute, Cell, Table};
let mut table = Table::new();
table.load_preset(comfy_table::presets::UTF8_FULL);
table.set_header(vec!["Title", "Authors", "Source", "Year"]);
for paper in papers {
let year = paper
.published_date
.as_ref()
.map(|d| d.chars().take(4).collect::<String>())
.unwrap_or_default();
let title = if paper.title.len() > 50 {
format!("{}...", &paper.title[..47])
} else {
paper.title.clone()
};
let authors = if paper.authors.len() > 30 {
format!("{}...", &paper.authors[..27])
} else {
paper.authors.clone()
};
table.add_row(vec![
Cell::new(title).add_attribute(Attribute::Bold),
Cell::new(authors),
Cell::new(paper.source.to_string()),
Cell::new(year),
]);
}
println!("{table}");
}
OutputFormat::Auto => unreachable!(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cli_version() {
let version = env!("CARGO_PKG_VERSION");
assert!(!version.is_empty());
let parts: Vec<&str> = version.split('.').collect();
assert!(parts.len() >= 2);
assert!(parts[0].parse::<u32>().is_ok());
}
#[test]
fn test_output_format_values() {
assert_eq!(OutputFormat::Auto as i32, 0);
assert_eq!(OutputFormat::Table as i32, 1);
assert_eq!(OutputFormat::Json as i32, 2);
assert_eq!(OutputFormat::Plain as i32, 3);
}
#[test]
fn test_cli_default_values() {
let cli = Cli::parse_from(["research-master"]);
assert_eq!(cli.verbose, 0);
assert!(!cli.quiet);
assert_eq!(cli.output, OutputFormat::Auto);
assert_eq!(cli.timeout, 30);
assert!(!cli.no_cache);
assert!(cli.command.is_none());
}
#[test]
fn test_cli_verbose_flag() {
let cli = Cli::parse_from(["research-master", "-v"]);
assert_eq!(cli.verbose, 1);
let cli = Cli::parse_from(["research-master", "-vv"]);
assert_eq!(cli.verbose, 2);
let cli = Cli::parse_from(["research-master", "--verbose"]);
assert_eq!(cli.verbose, 1);
}
#[test]
fn test_cli_quiet_flag() {
let cli = Cli::parse_from(["research-master", "-q"]);
assert!(cli.quiet);
let cli = Cli::parse_from(["research-master", "--quiet"]);
assert!(cli.quiet);
}
#[test]
fn test_cli_output_format() {
let cli = Cli::parse_from(["research-master", "-o", "json"]);
assert_eq!(cli.output, OutputFormat::Json);
let cli = Cli::parse_from(["research-master", "--output", "table"]);
assert_eq!(cli.output, OutputFormat::Table);
}
#[test]
fn test_cli_timeout() {
let cli = Cli::parse_from(["research-master", "--timeout", "60"]);
assert_eq!(cli.timeout, 60);
}
#[test]
fn test_cli_config_flag() {
let cli = Cli::parse_from(["research-master", "--config", "/path/to/config.toml"]);
assert_eq!(cli.config, Some(PathBuf::from("/path/to/config.toml")));
}
#[test]
fn test_cli_no_cache_flag() {
let cli = Cli::parse_from(["research-master", "--no-cache"]);
assert!(cli.no_cache);
}
#[test]
fn test_cli_search_command() {
let cli = Cli::parse_from(["research-master", "search", "machine learning"]);
match &cli.command {
Some(Commands::Search {
query, max_results, ..
}) => {
assert_eq!(query, "machine learning");
assert_eq!(*max_results, 10);
}
_ => panic!("Expected Search command"),
}
}
#[test]
fn test_cli_search_with_options() {
let cli = Cli::parse_from([
"research-master",
"search",
"neural networks",
"--max-results",
"50",
"--year",
"2023",
"--source",
"arxiv",
"--dedup",
]);
match &cli.command {
Some(Commands::Search {
query,
max_results,
year,
..
}) => {
assert_eq!(query, "neural networks");
assert_eq!(*max_results, 50);
assert_eq!(year.clone(), Some("2023".to_string()));
}
_ => panic!("Expected Search command"),
}
}
#[test]
fn test_cli_download_command() {
let cli = Cli::parse_from([
"research-master",
"download",
"2301.12345",
"--source",
"arxiv",
]);
match &cli.command {
Some(Commands::Download {
paper_id,
source,
output_path: _,
auto_filename: _,
create_dir: _,
doi: _,
}) => {
assert_eq!(paper_id, "2301.12345");
assert_eq!(*source, Source::Arxiv);
}
_ => panic!("Expected Download command"),
}
}
#[test]
fn test_cli_doi_command() {
let cli = Cli::parse_from(["research-master", "doi", "10.1234/test"]);
match &cli.command {
Some(Commands::LookupByDoi { doi, .. }) => {
assert_eq!(doi, "10.1234/test");
}
_ => panic!("Expected LookupByDoi command"),
}
}
#[test]
fn test_cli_sources_command() {
let cli = Cli::parse_from(["research-master", "sources"]);
match &cli.command {
Some(Commands::Sources { detailed, .. }) => {
assert!(!*detailed);
}
_ => panic!("Expected Sources command"),
}
}
#[test]
fn test_cli_sources_detailed() {
let cli = Cli::parse_from(["research-master", "sources", "--detailed"]);
match &cli.command {
Some(Commands::Sources { detailed, .. }) => {
assert!(*detailed);
}
_ => panic!("Expected Sources command"),
}
}
#[test]
fn test_cli_serve_command() {
let cli = Cli::parse_from(["research-master", "serve"]);
match &cli.command {
Some(Commands::Mcp {
stdio, port, host, ..
}) => {
assert!(*stdio);
assert_eq!(*port, 3000);
assert_eq!(host, "127.0.0.1");
}
_ => panic!("Expected Serve command"),
}
}
#[test]
fn test_cli_serve_http_mode() {
let cli = Cli::parse_from(["research-master", "serve", "--http"]);
assert!(matches!(cli.command, Some(Commands::Mcp { .. })));
}
#[test]
fn test_cli_author_command() {
let cli = Cli::parse_from(["research-master", "author", "Geoffrey Hinton"]);
match &cli.command {
Some(Commands::Author { author, .. }) => {
assert_eq!(author, "Geoffrey Hinton");
}
_ => panic!("Expected Author command"),
}
}
#[test]
fn test_cli_author_with_source() {
let cli = Cli::parse_from([
"research-master",
"author",
"Geoffrey Hinton",
"--source",
"semantic",
]);
match &cli.command {
Some(Commands::Author { author, source, .. }) => {
assert_eq!(author, "Geoffrey Hinton");
assert_eq!(*source, Source::Semantic);
}
_ => panic!("Expected Author command"),
}
}
#[test]
fn test_cli_read_command() {
let cli = Cli::parse_from([
"research-master",
"read",
"2301.12345",
"--source",
"arxiv",
"--path",
"/path/to/paper.pdf",
]);
match &cli.command {
Some(Commands::Read { paper_id, .. }) => {
assert_eq!(paper_id, "2301.12345");
}
_ => panic!("Expected Read command"),
}
}
#[test]
fn test_cli_read_with_options() {
let cli = Cli::parse_from([
"research-master",
"read",
"2301.12345",
"--source",
"arxiv",
"--path",
"/path/to/paper.pdf",
"--output-file",
"output.txt",
"--pages",
"5",
]);
match &cli.command {
Some(Commands::Read {
paper_id,
source,
pages,
output_file,
path: _,
..
}) => {
assert_eq!(paper_id, "2301.12345");
assert_eq!(*source, Source::Arxiv);
assert_eq!(*pages, Some(5));
assert_eq!(
output_file.clone().map(|p| p.to_string_lossy().to_string()),
Some("output.txt".to_string())
);
}
_ => panic!("Expected Read command"),
}
}
#[test]
fn test_cli_citations_command() {
let cli = Cli::parse_from([
"research-master",
"citations",
"2301.12345",
"--source",
"arxiv",
]);
match &cli.command {
Some(Commands::Citations { paper_id, .. }) => {
assert_eq!(paper_id, "2301.12345");
}
_ => panic!("Expected Citations command"),
}
}
#[test]
fn test_cli_citations_with_options() {
let cli = Cli::parse_from([
"research-master",
"citations",
"2301.12345",
"--source",
"semantic",
"--max-results",
"50",
]);
match &cli.command {
Some(Commands::Citations {
paper_id,
source,
max_results,
}) => {
assert_eq!(paper_id, "2301.12345");
assert_eq!(*source, Source::Semantic);
assert_eq!(*max_results, 50);
}
_ => panic!("Expected Citations command"),
}
}
#[test]
fn test_cli_references_command() {
let cli = Cli::parse_from([
"research-master",
"references",
"1706.03762",
"--source",
"semantic",
]);
match &cli.command {
Some(Commands::References { paper_id, .. }) => {
assert_eq!(paper_id, "1706.03762");
}
_ => panic!("Expected References command"),
}
}
#[test]
fn test_cli_references_alias() {
let cli = Cli::parse_from([
"research-master",
"ref",
"1706.03762",
"--source",
"semantic",
]);
assert!(matches!(cli.command, Some(Commands::References { .. })));
}
#[test]
fn test_cli_related_command() {
let cli = Cli::parse_from([
"research-master",
"related",
"1706.03762",
"--source",
"connected_papers",
]);
match &cli.command {
Some(Commands::Related { paper_id, .. }) => {
assert_eq!(paper_id, "1706.03762");
}
_ => panic!("Expected Related command"),
}
}
#[test]
fn test_cli_related_alias() {
let cli = Cli::parse_from([
"research-master",
"rel",
"1706.03762",
"--source",
"connected_papers",
]);
assert!(matches!(cli.command, Some(Commands::Related { .. })));
}
#[test]
fn test_cli_lookup_command() {
let cli = Cli::parse_from(["research-master", "doi", "10.1234/test"]);
match &cli.command {
Some(Commands::LookupByDoi { doi, .. }) => {
assert_eq!(doi, "10.1234/test");
}
_ => panic!("Expected LookupByDoi command"),
}
}
#[test]
fn test_cli_lookup_with_source() {
let cli = Cli::parse_from([
"research-master",
"doi",
"10.1234/test",
"--source",
"crossref",
]);
match &cli.command {
Some(Commands::LookupByDoi { doi, source, .. }) => {
assert_eq!(doi, "10.1234/test");
assert_eq!(*source, Source::CrossRef);
}
_ => panic!("Expected LookupByDoi command"),
}
}
#[test]
fn test_cli_cache_status() {
let cli = Cli::parse_from(["research-master", "cache", "status"]);
assert!(matches!(
cli.command,
Some(Commands::Cache {
command: CacheCommands::Status
})
));
}
#[test]
fn test_cli_cache_clear() {
let cli = Cli::parse_from(["research-master", "cache", "clear"]);
assert!(matches!(
cli.command,
Some(Commands::Cache {
command: CacheCommands::Clear
})
));
}
#[test]
fn test_cli_cache_clear_searches() {
let cli = Cli::parse_from(["research-master", "cache", "clear-searches"]);
assert!(matches!(
cli.command,
Some(Commands::Cache {
command: CacheCommands::ClearSearches
})
));
}
#[test]
fn test_cli_cache_clear_citations() {
let cli = Cli::parse_from(["research-master", "cache", "clear-citations"]);
assert!(matches!(
cli.command,
Some(Commands::Cache {
command: CacheCommands::ClearCitations
})
));
}
#[test]
fn test_cli_doctor_command() {
let cli = Cli::parse_from(["research-master", "doctor"]);
match &cli.command {
Some(Commands::Doctor {
check_connectivity,
check_api_keys,
verbose,
}) => {
assert!(!*check_connectivity);
assert!(!*check_api_keys);
assert!(!*verbose);
}
_ => panic!("Expected Doctor command"),
}
}
#[test]
fn test_cli_doctor_with_options() {
let cli = Cli::parse_from([
"research-master",
"doctor",
"--check-connectivity",
"--check-api-keys",
"--verbose",
]);
match &cli.command {
Some(Commands::Doctor {
check_connectivity,
check_api_keys,
verbose,
}) => {
assert!(*check_connectivity);
assert!(*check_api_keys);
assert!(*verbose);
}
_ => panic!("Expected Doctor command"),
}
}
#[test]
fn test_cli_doctor_alias() {
let cli = Cli::parse_from(["research-master", "diag"]);
assert!(matches!(cli.command, Some(Commands::Doctor { .. })));
}
#[test]
fn test_cli_update_command() {
let cli = Cli::parse_from(["research-master", "update"]);
match &cli.command {
Some(Commands::Update { force, dry_run }) => {
assert!(!*force);
assert!(!*dry_run);
}
_ => panic!("Expected Update command"),
}
}
#[test]
fn test_cli_update_with_options() {
let cli = Cli::parse_from(["research-master", "update", "--force", "--dry-run"]);
match &cli.command {
Some(Commands::Update { force, dry_run }) => {
assert!(*force);
assert!(*dry_run);
}
_ => panic!("Expected Update command"),
}
}
#[test]
fn test_cli_completions_bash() {
let cli = Cli::parse_from(["research-master", "completions", "bash"]);
match &cli.command {
Some(Commands::Completions { shell }) => {
assert!(matches!(shell, Shell::Bash));
}
_ => panic!("Expected Completions command"),
}
}
#[test]
fn test_cli_completions_zsh() {
let cli = Cli::parse_from(["research-master", "completions", "zsh"]);
match &cli.command {
Some(Commands::Completions { shell }) => {
assert!(matches!(shell, Shell::Zsh));
}
_ => panic!("Expected Completions command"),
}
}
#[test]
fn test_cli_completions_fish() {
let cli = Cli::parse_from(["research-master", "completions", "fish"]);
match &cli.command {
Some(Commands::Completions { shell }) => {
assert!(matches!(shell, Shell::Fish));
}
_ => panic!("Expected Completions command"),
}
}
#[test]
fn test_cli_completions_powershell() {
let cli = Cli::parse_from(["research-master", "completions", "power-shell"]);
match &cli.command {
Some(Commands::Completions { shell }) => {
assert!(matches!(shell, Shell::PowerShell));
}
_ => panic!("Expected Completions command"),
}
}
#[test]
fn test_cli_completions_alias() {
let cli = Cli::parse_from(["research-master", "completion", "bash"]);
assert!(matches!(cli.command, Some(Commands::Completions { .. })));
}
#[test]
fn test_cli_dedupe_command() {
let cli = Cli::parse_from(["research-master", "dedupe", "papers.json"]);
match &cli.command {
Some(Commands::Dedupe { input, .. }) => {
assert_eq!(input.to_string_lossy(), "papers.json");
}
_ => panic!("Expected Dedupe command"),
}
}
#[test]
fn test_cli_dedupe_with_options() {
let cli = Cli::parse_from([
"research-master",
"dedupe",
"papers.json",
"-O",
"deduped.json",
"--strategy",
"last",
"--show",
]);
match &cli.command {
Some(Commands::Dedupe {
input,
output_file,
strategy,
show,
}) => {
assert_eq!(input.to_string_lossy(), "papers.json");
assert_eq!(
output_file.clone().map(|p| p.to_string_lossy().to_string()),
Some("deduped.json".to_string())
);
assert_eq!(*strategy, DedupStrategy::Last);
assert!(*show);
}
_ => panic!("Expected Dedupe command"),
}
}
#[test]
fn test_cli_dedupe_alias() {
let cli = Cli::parse_from(["research-master", "dedup", "papers.json"]);
assert!(matches!(cli.command, Some(Commands::Dedupe { .. })));
}
#[test]
fn test_cli_search_all_options() {
let cli = Cli::parse_from([
"research-master",
"search",
"transformer",
"--source",
"arxiv",
"--max-results",
"25",
"--year",
"2020-2023",
"--sort-by",
"citations",
"--order",
"desc",
"--category",
"cs.CL",
"--author",
"Vaswani",
"--dedup",
"--dedup-strategy",
"mark",
]);
match &cli.command {
Some(Commands::Search {
query,
source,
max_results,
year,
sort_by,
order,
category,
author,
dedup,
dedup_strategy,
fetch_details,
}) => {
assert_eq!(query, "transformer");
assert_eq!(*source, Source::Arxiv);
assert_eq!(*max_results, 25);
assert_eq!(year.clone(), Some("2020-2023".to_string()));
assert_eq!(*sort_by, Some(SortField::Citations));
assert_eq!(*order, Some(Order::Desc));
assert_eq!(category.clone(), Some("cs.CL".to_string()));
assert_eq!(author.clone(), Some("Vaswani".to_string()));
assert!(*dedup);
assert_eq!(*dedup_strategy, Some(DedupStrategy::Mark));
assert!(*fetch_details); }
_ => panic!("Expected Search command"),
}
}
#[test]
fn test_source_enum_all_variants() {
let variants = [
Source::Arxiv,
Source::Pubmed,
Source::Biorxiv,
Source::Semantic,
Source::OpenAlex,
Source::CrossRef,
Source::Iacr,
Source::Pmc,
Source::Hal,
Source::Dblp,
Source::Ssrn,
Source::Dimensions,
Source::IeeeXplore,
Source::EuropePmc,
Source::Core,
Source::Zenodo,
Source::Unpaywall,
Source::Mdpi,
Source::Jstor,
Source::Scispace,
Source::Acm,
Source::ConnectedPapers,
Source::Doaj,
Source::WorldWideScience,
Source::Osf,
Source::Base,
Source::Springer,
Source::GoogleScholar,
Source::All,
];
assert_eq!(variants.len(), 29);
}
#[test]
fn test_source_to_id_all_variants() {
let tests = [
(Source::Arxiv, "arxiv"),
(Source::Pubmed, "pubmed"),
(Source::Biorxiv, "biorxiv"),
(Source::Semantic, "semantic"),
(Source::OpenAlex, "openalex"),
(Source::CrossRef, "crossref"),
(Source::Iacr, "iacr"),
(Source::Pmc, "pmc"),
(Source::Hal, "hal"),
(Source::Dblp, "dblp"),
(Source::Ssrn, "ssrn"),
(Source::Dimensions, "dimensions"),
(Source::IeeeXplore, "ieee_xplore"),
(Source::EuropePmc, "europe_pmc"),
(Source::Core, "core"),
(Source::Zenodo, "zenodo"),
(Source::Unpaywall, "unpaywall"),
(Source::Mdpi, "mdpi"),
(Source::Jstor, "jstor"),
(Source::Scispace, "scispace"),
(Source::Acm, "acm"),
(Source::ConnectedPapers, "connected_papers"),
(Source::Doaj, "doaj"),
(Source::WorldWideScience, "worldwidescience"),
(Source::Osf, "osf"),
(Source::Base, "base"),
(Source::Springer, "springer"),
(Source::GoogleScholar, "google_scholar"),
];
for (source, expected_id) in tests {
assert_eq!(source_to_id(source), expected_id, "Failed for {:?}", source);
}
}
#[test]
fn test_sort_field_enum() {
assert_eq!(SortField::Relevance as i32, 0);
assert_eq!(SortField::Date as i32, 1);
assert_eq!(SortField::Citations as i32, 2);
assert_eq!(SortField::Title as i32, 3);
assert_eq!(SortField::Author as i32, 4);
}
#[test]
fn test_order_enum() {
assert_eq!(Order::Asc as i32, 0);
assert_eq!(Order::Desc as i32, 1);
}
#[test]
fn test_dedup_strategy_enum() {
assert_eq!(DedupStrategy::First as i32, 0);
assert_eq!(DedupStrategy::Last as i32, 1);
assert_eq!(DedupStrategy::Mark as i32, 2);
}
#[test]
fn test_capability_filter_enum() {
assert_eq!(CapabilityFilter::Search as i32, 0);
assert_eq!(CapabilityFilter::Download as i32, 1);
assert_eq!(CapabilityFilter::Read as i32, 2);
assert_eq!(CapabilityFilter::Citations as i32, 3);
assert_eq!(CapabilityFilter::DoiLookup as i32, 4);
assert_eq!(CapabilityFilter::AuthorSearch as i32, 5);
}
#[test]
fn test_cli_download_all_options() {
let cli = Cli::parse_from([
"research-master",
"download",
"2301.12345",
"--source",
"arxiv",
"--output-path",
"/path/to/file.pdf",
"--auto-filename",
"--create-dir",
"--doi",
"10.1234/test",
]);
match &cli.command {
Some(Commands::Download {
paper_id,
source,
output_path,
auto_filename,
create_dir,
doi,
}) => {
assert_eq!(paper_id, "2301.12345");
assert_eq!(*source, Source::Arxiv);
assert_eq!(
output_path.clone().map(|p| p.to_string_lossy().to_string()),
Some("/path/to/file.pdf".to_string())
);
assert!(*auto_filename);
assert!(*create_dir);
assert_eq!(
doi.clone().map(|d| d.to_string()),
Some("10.1234/test".to_string())
);
}
_ => panic!("Expected Download command"),
}
}
#[test]
fn test_cli_sources_with_capability() {
let cli = Cli::parse_from([
"research-master",
"sources",
"--with-capability",
"download",
]);
match &cli.command {
Some(Commands::Sources {
with_capability, ..
}) => {
assert_eq!(*with_capability, Some(CapabilityFilter::Download));
}
_ => panic!("Expected Sources command"),
}
}
#[test]
fn test_cli_author_all_options() {
let cli = Cli::parse_from([
"research-master",
"author",
"Geoffrey Hinton",
"--source",
"all",
"--max-results",
"20",
"--year",
"2010-",
"--dedup",
"--dedup-strategy",
"first",
]);
match &cli.command {
Some(Commands::Author {
author,
source,
max_results,
year,
dedup,
dedup_strategy,
}) => {
assert_eq!(author, "Geoffrey Hinton");
assert_eq!(*source, Source::All);
assert_eq!(*max_results, 20);
assert_eq!(year.clone(), Some("2010-".to_string()));
assert!(*dedup);
assert_eq!(*dedup_strategy, Some(DedupStrategy::First));
}
_ => panic!("Expected Author command"),
}
}
}