use std::fs;
use std::io::BufReader;
use ipld_core::ipld::Ipld;
use mnem_core::HEADS_PREFIX;
use mnem_transport::remote::{RemoteConfigFile, RemoteSection, serialize_config};
use super::*;
#[derive(clap::Args, Debug)]
#[command(after_long_help = "\
Examples:
mnem clone file:///tmp/alice.car /tmp/mirror
mnem clone ./alice.car ./mirror # bare path OK when it ends in .car
mnem clone file:///tmp/alice.car # clones into $PWD/alice (derived from url)
")]
pub(crate) struct Args {
pub url: String,
pub dir: Option<std::path::PathBuf>,
}
pub(crate) fn run(_override: Option<&Path>, args: Args) -> Result<()> {
let local_path = parse_clone_source(&args.url)?;
let target_dir = resolve_target_dir(&args.url, args.dir.as_deref())?;
let data_dir = target_dir.join(repo::MNEM_DIR);
if data_dir.exists() {
bail!(
"target directory already contains a mnem repository at {}; refusing to clone",
data_dir.display()
);
}
let result = (|| -> Result<()> {
fs::create_dir_all(&target_dir)
.with_context(|| format!("creating {}", target_dir.display()))?;
let (bs, ohs) = repo::create_or_open_stores(&data_dir)?;
let file = fs::File::open(&local_path)
.with_context(|| format!("opening {}", local_path.display()))?;
let mut r = BufReader::new(file);
let stats = mnem_transport::import(&mut r, &*bs).with_context(|| {
format!(
"importing CAR from {}\n\
hint: see docs/RUNBOOK.md#5-car-import-rejected for the error-variant \
taxonomy (malformed CAR, CID mismatch, size cap, missing root, ...).",
local_path.display()
)
})?;
let r_repo = ReadonlyRepo::init(bs.clone(), ohs.clone())?;
let head_commit = find_head_commit(&bs, &stats.roots)?;
let mut section = RemoteSection::default();
section.remote.insert(
"origin".into(),
RemoteConfigFile {
url: args.url.clone(),
capabilities: None,
token_env: None,
},
);
let config_text = serialize_config(§ion).context("serialising remote section")?;
fs::write(data_dir.join(config::CONFIG_FILE), config_text)
.context("writing .mnem/config.toml")?;
if let Some(head_cid) = &head_commit {
let cfg = config::load(&data_dir)?;
let author = config::author_string(&cfg);
let after_remote = r_repo.update_ref(
"refs/remotes/origin/main",
None,
Some(RefTarget::normal(head_cid.clone())),
&author,
)?;
let after_local = after_remote.update_ref(
&format!("{HEADS_PREFIX}main"),
None,
Some(RefTarget::normal(head_cid.clone())),
&author,
)?;
after_local.update_heads(head_cid.clone(), &author)?;
}
let embed_copied = try_copy_embed_config(&local_path, &data_dir);
println!(
"cloned {} blocks ({} bytes) from {} into {}",
stats.blocks,
stats.bytes,
args.url,
target_dir.display()
);
match &head_commit {
Some(c) => println!(" origin/main -> {c}"),
None => println!(" origin/main -> <no commit found in CAR>"),
}
if !embed_copied {
println!(
"note: embedder config was not copied. Run `mnem config set embed.provider <provider>` \
to configure embeddings, or copy [embed] from the source repo's .mnem/config.toml manually."
);
}
Ok(())
})();
if result.is_err() {
eprintln!("clone failed; removing partial .mnem/ directory");
let _ = fs::remove_dir_all(&data_dir);
}
result
}
fn try_copy_embed_config(source: &std::path::Path, dest_data_dir: &std::path::Path) -> bool {
if !source.is_dir() {
return false;
}
let src_config_path = source.join(repo::MNEM_DIR).join(config::CONFIG_FILE);
let Ok(src_text) = fs::read_to_string(&src_config_path) else {
return false;
};
let Ok(src_cfg) = toml::from_str::<config::Config>(&src_text) else {
return false;
};
let Some(embed) = src_cfg.embed else {
return false;
};
let embed_only = config::Config {
embed: Some(embed),
..Default::default()
};
let Ok(embed_toml) = toml::to_string_pretty(&embed_only) else {
return false;
};
let Ok(embed_value) = toml::from_str::<toml::Value>(&embed_toml) else {
return false;
};
let Some(embed_table) = embed_value.get("embed").cloned() else {
return false;
};
let dest_config_path = dest_data_dir.join(config::CONFIG_FILE);
let dest_text = fs::read_to_string(&dest_config_path).unwrap_or_default();
let Ok(mut dest_root) = toml::from_str::<toml::Value>(&dest_text) else {
return false;
};
let Some(dest_table) = dest_root.as_table_mut() else {
return false;
};
dest_table.insert("embed".into(), embed_table);
let Ok(out) = toml::to_string_pretty(&dest_root) else {
return false;
};
fs::write(&dest_config_path, out).is_ok()
}
fn parse_clone_source(url: &str) -> Result<std::path::PathBuf> {
let has_scheme = url.contains("://");
if !has_scheme {
let normalized = super::normalize_cli_path(url);
let p = std::path::PathBuf::from(&normalized);
if !p.extension().is_some_and(|e| e.eq_ignore_ascii_case("car")) {
bail!(
"`{url}` does not look like a URL or a `.car` path. \
Pass file:///abs/path/archive.car or a bare *.car path."
);
}
return Ok(p);
}
if let Some(rest) = url.strip_prefix("file://") {
let trimmed = if rest.starts_with('/') && rest.len() >= 3 && rest.as_bytes()[2] == b':' {
&rest[1..]
} else {
rest
};
let normalized = super::normalize_cli_path(trimmed);
return Ok(std::path::PathBuf::from(normalized));
}
let scheme = url.split("://").next().unwrap_or("<unknown>");
bail!(
"clone over the `{scheme}` scheme is not yet implemented. \
mnem 0.3 ships `file://` clone only; remote schemes land in PR 3 \
(Q2-of-PR-3). See docs/ROADMAP.md and ."
);
}
fn resolve_target_dir(url: &str, explicit: Option<&Path>) -> Result<std::path::PathBuf> {
if let Some(d) = explicit {
return Ok(d.to_path_buf());
}
let tail = url.rsplit('/').next().unwrap_or(url);
let stem = tail.trim_end_matches(".car");
if stem.is_empty() {
bail!("could not derive a target dir from `{url}`; pass <dir> explicitly");
}
let cwd = std::env::current_dir().context("cwd unreadable")?;
Ok(cwd.join(stem))
}
fn find_head_commit(
bs: &std::sync::Arc<dyn mnem_core::store::Blockstore>,
roots: &[mnem_core::id::Cid],
) -> Result<Option<mnem_core::id::Cid>> {
if let Some(root_cid) = roots.first() {
let Some(bytes) = bs.get(root_cid)? else {
return find_head_commit_heuristic(bs, roots);
};
if let Ok(Ipld::Map(m)) = from_canonical_bytes::<Ipld>(&bytes) {
if matches!(m.get("_kind"), Some(Ipld::String(k)) if k == "commit") {
return Ok(Some(root_cid.clone()));
}
}
}
find_head_commit_heuristic(bs, roots)
}
fn find_head_commit_heuristic(
bs: &std::sync::Arc<dyn mnem_core::store::Blockstore>,
roots: &[mnem_core::id::Cid],
) -> Result<Option<mnem_core::id::Cid>> {
use std::collections::HashSet;
let mut commits: Vec<(mnem_core::id::Cid, u64)> = Vec::new();
let mut referenced_as_parent: HashSet<mnem_core::id::Cid> = HashSet::new();
for root_cid in roots {
let Some(bytes) = bs.get(root_cid)? else {
continue;
};
let Ok(Ipld::Map(m)) = from_canonical_bytes::<Ipld>(&bytes) else {
continue;
};
let Some(Ipld::String(kind)) = m.get("_kind") else {
continue;
};
if kind != "commit" {
continue;
}
let time = match m.get("time") {
Some(Ipld::Integer(n)) => u64::try_from(*n).unwrap_or(0),
_ => 0,
};
commits.push((root_cid.clone(), time));
if let Some(Ipld::Link(parent_cid)) = m.get("parent") {
if let Ok(p) = mnem_core::id::Cid::from_bytes(&parent_cid.to_bytes()) {
referenced_as_parent.insert(p);
}
}
}
if commits.is_empty() {
return Ok(None);
}
let best = {
let tips: Vec<&(mnem_core::id::Cid, u64)> = commits
.iter()
.filter(|(cid, _)| !referenced_as_parent.contains(cid))
.collect();
let candidates: &[&(mnem_core::id::Cid, u64)] = if tips.is_empty() {
&commits.iter().collect::<Vec<_>>()
} else {
&tips
};
candidates
.iter()
.max_by(|(a_cid, a_time), (b_cid, b_time)| {
a_time
.cmp(b_time)
.then_with(|| a_cid.to_bytes().cmp(&b_cid.to_bytes()))
})
.map(|(cid, _)| (*cid).clone())
};
Ok(best)
}
#[cfg(test)]
mod find_head_commit_tests {
use std::collections::BTreeMap;
use std::sync::Arc;
use ipld_core::ipld::Ipld;
use mnem_core::codec::{hash_to_cid, to_canonical_bytes};
use mnem_core::store::{Blockstore, MemoryBlockstore};
use super::find_head_commit;
fn make_commit(bs: &MemoryBlockstore, time: u64) -> mnem_core::id::Cid {
let mut m = BTreeMap::new();
m.insert("_kind".to_string(), Ipld::String("commit".to_string()));
m.insert("time".to_string(), Ipld::Integer(i128::from(time)));
let ipld = Ipld::Map(m);
let bytes = to_canonical_bytes(&ipld).unwrap();
let (_, cid) = hash_to_cid(&ipld).unwrap();
bs.put(cid.clone(), bytes).unwrap();
cid
}
#[test]
fn uses_roots0_not_largest_time() {
let inner = MemoryBlockstore::new();
let head_cid = make_commit(&inner, 1);
let _stale_cid = make_commit(&inner, 9999);
let bs: Arc<dyn Blockstore> = Arc::new(inner);
let roots = vec![head_cid.clone()];
let result = find_head_commit(&bs, &roots).unwrap();
assert_eq!(
result,
Some(head_cid),
"find_head_commit must return roots[0], not the commit with the largest time"
);
}
#[test]
fn fallback_when_root_missing_from_blockstore() {
let inner = MemoryBlockstore::new();
let commit_cid = make_commit(&inner, 42);
let bs: Arc<dyn Blockstore> = Arc::new(inner);
use mnem_core::id::{CODEC_DAG_CBOR, Cid, Multihash};
let phantom = Cid::new(CODEC_DAG_CBOR, Multihash::sha2_256(b"not-stored"));
let roots = vec![phantom, commit_cid.clone()];
let result = find_head_commit(&bs, &roots).unwrap();
assert_eq!(
result,
Some(commit_cid),
"heuristic fallback must return the available commit when roots[0] is missing"
);
}
}
#[cfg(test)]
mod parse_clone_source_tests {
use super::parse_clone_source;
#[test]
#[cfg(windows)]
fn file_uri_with_git_bash_drive_letter_normalizes() {
let p = parse_clone_source("file:///c/tmp/repo.car").expect("parse ok");
let s = p.to_string_lossy().replace('\\', "/");
assert!(
s.starts_with("c:/") || s.starts_with("C:/"),
"expected drive-letter path, got {s:?}"
);
assert!(s.ends_with("/tmp/repo.car"), "got {s:?}");
}
#[test]
#[cfg(windows)]
fn file_uri_with_uppercase_drive_letter_unchanged() {
let p = parse_clone_source("file:///C:/tmp/repo.car").expect("parse ok");
let s = p.to_string_lossy().replace('\\', "/");
assert!(s.starts_with("C:/"), "got {s:?}");
assert!(s.ends_with("/tmp/repo.car"), "got {s:?}");
}
#[test]
fn bare_car_path_still_accepted() {
let p = parse_clone_source("./alice.car").expect("parse ok");
assert!(p.to_string_lossy().ends_with("alice.car"));
}
#[test]
fn unsupported_scheme_rejected() {
let err = parse_clone_source("https://example.com/repo.car").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("not yet implemented"), "got {msg}");
}
}