use super::*;
use std::collections::HashSet;
use std::fmt;
use std::fs::File;
use std::io::{BufWriter, Write};
use bytes::Bytes;
use mnem_core::error::StoreError;
use mnem_core::id::Cid;
use mnem_core::prolly::Cursor;
use mnem_core::store::Blockstore;
fn resolve_ref_or_cid(r: &ReadonlyRepo, from: &str) -> Result<mnem_core::id::Cid> {
if from == "HEAD" {
return r
.view()
.heads
.first()
.cloned()
.ok_or_else(|| anyhow!("repository has no commits yet; nothing to export"));
}
if let Some(target) = r.view().refs.get(from) {
return match target {
RefTarget::Normal { target } => Ok(target.clone()),
RefTarget::Conflicted { adds, removes } => Err(anyhow!(
"ref `{from}` is conflicted (adds={}, removes={}); resolve it first",
adds.len(),
removes.len()
)),
};
}
mnem_core::id::Cid::parse_str(from)
.with_context(|| format!("`{from}` is neither a known ref nor a valid CID"))
}
fn collect_tombstoned_node_cids(
bs: &dyn Blockstore,
nodes_root: &Cid,
tombstone_ids: &HashSet<mnem_core::id::NodeId>,
) -> Result<HashSet<Cid>> {
let mut scrubbed: HashSet<Cid> = HashSet::new();
let cursor = Cursor::new(bs, nodes_root).context("opening Prolly cursor over nodes tree")?;
for item in cursor {
let (key, node_cid) = item.context("iterating nodes Prolly tree")?;
let node_id = mnem_core::id::NodeId::from_bytes_raw(key.0);
if tombstone_ids.contains(&node_id) {
scrubbed.insert(node_cid);
}
}
Ok(scrubbed)
}
struct ScrubBlockstore<'a> {
inner: &'a dyn Blockstore,
excluded: HashSet<Cid>,
}
impl<'a> ScrubBlockstore<'a> {
fn new(inner: &'a dyn Blockstore, excluded: HashSet<Cid>) -> Self {
Self { inner, excluded }
}
}
impl fmt::Debug for ScrubBlockstore<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ScrubBlockstore {{ excluded: {} }}", self.excluded.len())
}
}
impl Blockstore for ScrubBlockstore<'_> {
fn has(&self, cid: &Cid) -> Result<bool, StoreError> {
self.inner.has(cid)
}
fn get(&self, cid: &Cid) -> Result<Option<Bytes>, StoreError> {
self.inner.get(cid)
}
fn put(&self, cid: Cid, data: Bytes) -> Result<(), StoreError> {
self.inner.put(cid, data)
}
fn put_trusted(&self, cid: Cid, data: Bytes) -> Result<(), StoreError> {
self.inner.put_trusted(cid, data)
}
fn delete(&self, cid: &Cid) -> Result<(), StoreError> {
self.inner.delete(cid)
}
fn iter_from_root<'b>(
&'b self,
root: &Cid,
) -> Box<dyn Iterator<Item = Result<(Cid, Bytes), StoreError>> + 'b> {
let excluded = &self.excluded;
Box::new(
self.inner
.iter_from_root(root)
.filter(move |item| match item {
Ok((cid, _)) => !excluded.contains(cid),
Err(_) => true,
}),
)
}
fn all_cids(&self) -> Result<Option<Vec<Cid>>, StoreError> {
self.inner.all_cids()
}
}
#[derive(clap::Args, Debug)]
#[command(after_long_help = "\
Examples:
mnem export out.car # export from HEAD (default, recommended)
mnem export --from HEAD out.car # same, explicit
mnem export --from <cid> backup.car # export from specific op CID
mnem export --scrub out.car # omit tombstoned (soft-deleted) nodes
mnem export - | ssh server 'mnem import -' # pipe over SSH
NOTE: Named refs like `refs/heads/main` do NOT auto-advance with `mnem add node`.
They point to the commit when the branch was created, not the current HEAD.
Using `--from refs/heads/main` exports only the data up to that anchor commit.
Run `mnem status` to compare the current HEAD vs. named ref values.
To export all current data, use `--from HEAD` (or omit `--from` entirely).
")]
pub(crate) struct Args {
pub path: String,
#[arg(long)]
pub from: Option<String>,
#[arg(long)]
pub scrub: bool,
}
pub(crate) fn run(override_path: Option<&Path>, args: Args) -> Result<()> {
let (_dir, r, bs, _ohs) = repo::open_all(override_path)?;
let from_str = args.from.as_deref().unwrap_or("HEAD");
let root = resolve_ref_or_cid(&r, from_str)?;
if let Some(explicit_from) = &args.from {
if let Some(head_cid) = r.view().heads.first() {
if &root != head_cid {
eprintln!(
"warning: `{explicit_from}` resolves to {root} which is behind HEAD ({head_cid})."
);
eprintln!("Exporting from this ref will not include recent commits.");
eprintln!("Use `--from HEAD` to export all current data.");
}
}
}
let scrub_store: Option<ScrubBlockstore<'_>> = if args.scrub {
let tombstones = &r.view().tombstones;
let n_tombstones = tombstones.len();
let excluded = if n_tombstones == 0 {
HashSet::new()
} else {
let tombstone_ids: HashSet<mnem_core::id::NodeId> =
tombstones.keys().copied().collect();
let commit = r
.head_commit()
.ok_or_else(|| anyhow!("repository has no commits yet; nothing to scrub"))?;
collect_tombstoned_node_cids(&*bs, &commit.nodes, &tombstone_ids)
.context("collecting tombstoned node CIDs for --scrub")?
};
eprintln!("(scrubbing {} tombstoned nodes from export)", n_tombstones);
Some(ScrubBlockstore::new(&*bs, excluded))
} else {
None
};
let effective_bs: &dyn Blockstore = match &scrub_store {
Some(s) => s,
None => &*bs,
};
let normalized = super::normalize_cli_path(&args.path);
let stats = if normalized == "-" {
let stdout = std::io::stdout();
let mut lock = stdout.lock();
mnem_transport::export(effective_bs, &root, &mut lock).context("writing CAR to stdout")?
} else {
let path = Path::new(&normalized);
let file = File::create(path).with_context(|| format!("creating {}", path.display()))?;
let mut w = BufWriter::new(file);
let stats = mnem_transport::export(effective_bs, &root, &mut w)
.with_context(|| format!("writing CAR to {}", path.display()))?;
w.flush()
.with_context(|| format!("flushing {}", path.display()))?;
stats
};
println!(
"exported {} blocks, {} bytes to {}",
stats.blocks, stats.bytes, normalized
);
Ok(())
}