use clap::{Arg, ArgAction, ArgMatches, Command};
use std::path::Path;
use std::time::Instant;
use crate::JunctionTable;
use crate::cmd::resolve_db_path;
pub fn command() -> Command {
Command::new("migrate")
.about("Apply any pending database schema migrations")
.long_about(
"Open the local commonmeta database and apply any pending schema \
migrations, then exit. Safe to run on databases built with older \
releases: migrations are idempotent and only alter the schema \
(no existing records are modified or removed).\n\n\
Each step is printed to stderr with elapsed time. On large databases \
(hundreds of millions of records) index creation can take 30–90 minutes; \
the command will not hang silently.\n\n\
Junction-table backfills populate works_orcid, works_ror, or \
works_references for existing records. Required once on databases \
built before the junction tables were introduced. Each flag tracks \
its own resume cursor in the settings table so runs can be \
interrupted and restarted safely. --backfill runs all three in a \
single streaming pass (most efficient for large corpora).\n\n\
Examples:\n\n\
commonmeta migrate\n\
commonmeta migrate --backfill\n\
commonmeta migrate --references\n\
commonmeta migrate --orcid --ror\n\
commonmeta migrate --file /var/lib/commonmeta/commonmeta.sqlite3",
)
.arg(
Arg::new("file")
.long("file")
.help("Path to the SQLite database (overrides COMMONMETA_DB and platform default)"),
)
.arg(
Arg::new("backfill")
.long("backfill")
.help("Populate all junction tables (works_orcid, works_ror, works_references) in one pass")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("orcid")
.long("orcid")
.help("Populate works_orcid for all existing works")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("ror")
.long("ror")
.help("Populate works_ror for all existing works")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("references")
.long("references")
.help("Populate works_references for all existing works")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("crossref")
.long("crossref")
.help("Restrict backfill to Crossref works only")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("datacite")
.long("datacite")
.help("Restrict backfill to DataCite works only")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("rebuild-fts")
.long("rebuild-fts")
.help("Drop and rebuild all three FTS5 indexes (works_fts, people_fts, organizations_fts)")
.action(ArgAction::SetTrue),
)
}
pub fn execute(matches: &ArgMatches) -> Result<(), String> {
let db_path_str = resolve_db_path(matches.get_one::<String>("file"));
let db_path = Path::new(&db_path_str);
if !db_path.exists() {
return Err(format!(
"database not found at '{}'; run 'commonmeta import' first",
db_path_str
));
}
eprintln!("migrate: {}", db_path_str);
let total_start = Instant::now();
let (applied, version) =
crate::run_migrations(db_path).map_err(|e| e.to_string())?;
if applied == 0 {
eprintln!("migrate: already at schema version {version}, nothing to do");
} else {
eprintln!(
"migrate: applied {} step(s), schema version {} ({:.1?})",
applied,
version,
total_start.elapsed()
);
}
let backfill_all = matches.get_flag("backfill");
let mut tables: Vec<JunctionTable> = Vec::new();
if backfill_all {
tables = vec![JunctionTable::Orcid, JunctionTable::Ror, JunctionTable::References];
} else {
if matches.get_flag("orcid") { tables.push(JunctionTable::Orcid); }
if matches.get_flag("ror") { tables.push(JunctionTable::Ror); }
if matches.get_flag("references") { tables.push(JunctionTable::References); }
}
let mut providers: Vec<&str> = Vec::new();
if matches.get_flag("crossref") { providers.push("Crossref"); }
if matches.get_flag("datacite") { providers.push("DataCite"); }
if !tables.is_empty() {
let table_label = if backfill_all { "all junction tables".to_string() }
else { tables.iter().map(|t| format!("works_{}", match t {
JunctionTable::Orcid => "orcid",
JunctionTable::Ror => "ror",
JunctionTable::References => "references",
})).collect::<Vec<_>>().join(", ") };
let provider_label = if providers.is_empty() { String::new() }
else { format!(" ({})", providers.join(", ")) };
let label = format!("{table_label}{provider_label}");
eprintln!("migrate: backfilling {label} …");
let bf_start = Instant::now();
let (scanned, inserted) =
crate::backfill_junction_tables(db_path, &tables, &providers).map_err(|e| e.to_string())?;
eprintln!(
"migrate: backfill complete — {scanned} works scanned, {inserted} rows indexed ({:.1?})",
bf_start.elapsed()
);
println!("backfilled {label} in {} (schema version {version}, {inserted} rows)", db_path_str);
} else {
println!("migrated {} (schema version {})", db_path_str, version);
}
if matches.get_flag("rebuild-fts") {
let fts_start = Instant::now();
eprintln!("migrate: rebuilding works_fts …");
crate::rebuild_works_fts(db_path).map_err(|e| e.to_string())?;
eprintln!("migrate: works_fts done ({:.1?})", fts_start.elapsed());
let fts_start = Instant::now();
eprintln!("migrate: rebuilding organizations_fts …");
match crate::rebuild_organizations_fts(db_path) {
Ok(()) => eprintln!("migrate: organizations_fts done ({:.1?})", fts_start.elapsed()),
Err(e) => eprintln!("migrate: organizations_fts skipped — {e}"),
}
let fts_start = Instant::now();
eprintln!("migrate: rebuilding people_fts …");
match crate::rebuild_people_fts(db_path) {
Ok(()) => eprintln!("migrate: people_fts done ({:.1?})", fts_start.elapsed()),
Err(e) => eprintln!("migrate: people_fts skipped — {e}"),
}
println!("rebuild-fts complete ({:.1?})", total_start.elapsed());
}
Ok(())
}