use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use rho_core::{
RhoResult, arg_value, canonical_display, file_digest, file_name, mime_type, require_arg,
uuid_like, yaml_quote,
};
fn usage() -> ! {
eprintln!(
"usage:\n rho dataset --name <name> --real <path> --mock <path> [--root <repo>] [--owner <owner>] [--share-dir <path>] [--private-dir <path>] [--uuid <uuid>] [--description <text>]\n rho dataset set <name> --public <source> [--root <repo>] [--owner <owner>] [--revision <rev>] [--commit] [--push] [--pr]\n rho dataset bind <name> --real <path> [--uuid <uuid>] [--owner <owner>]\n rho dataset list [--root <repo>] [--repo-only|--bindings-only]\n rho dataset remove <name|uuid> (--repo|--binding) [--root <repo>] [--yes]\n rho dataset publish <name|uuid> [--root <repo>] [--owner <owner>] [--source-root <path>] [--target-root <path>] [--commit] [--push] [--pr]"
);
std::process::exit(2);
}
pub fn run(args: &[String]) -> RhoResult<()> {
if args.iter().any(|arg| arg == "--help" || arg == "-h") {
usage();
}
if args.first().map(String::as_str) == Some("publish") {
return super::publish::run(&args[1..]);
}
if args.first().map(String::as_str) == Some("set") {
return super::dataset_set::run(&args[1..]);
}
if args.first().map(String::as_str) == Some("bind") {
return super::dataset_bind::bind(&args[1..]);
}
if args.first().map(String::as_str) == Some("list") {
return super::dataset_bind::list(&args[1..]);
}
if args.first().map(String::as_str) == Some("remove") {
return super::dataset_bind::remove(&args[1..]);
}
let name = require_arg(args, "--name").unwrap_or_else(|_| usage());
let real_source = PathBuf::from(require_arg(args, "--real").unwrap_or_else(|_| usage()));
let mock_source = PathBuf::from(require_arg(args, "--mock").unwrap_or_else(|_| usage()));
let root = arg_value(args, "--root")
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from("."));
let owner = arg_value(args, "--owner")
.unwrap_or_else(|| active_handle().unwrap_or_else(|| "user1".to_string()));
let description = arg_value(args, "--description").unwrap_or_default();
let dataset_uuid = arg_value(args, "--uuid").unwrap_or_else(uuid_like);
let dataset_slug = dataset_path_slug(&name)?;
let share_dir = arg_value(args, "--share-dir")
.map(PathBuf::from)
.unwrap_or_else(|| default_share_dir(&root, &owner));
let private_dir = arg_value(args, "--private-dir")
.map(PathBuf::from)
.unwrap_or_else(|| default_private_dir(&root, &owner));
if !real_source.is_file() {
return Err(format!("real dataset file not found: {}", real_source.display()).into());
}
if !mock_source.is_file() {
return Err(format!("mock dataset file not found: {}", mock_source.display()).into());
}
let share_bundle = share_dir.join(&dataset_slug);
let private_bundle = private_dir.join(&dataset_slug);
fs::create_dir_all(share_bundle.join("mock"))?;
fs::create_dir_all(private_bundle.join("real"))?;
let mock_name = file_name(&mock_source)?;
let real_name = file_name(&real_source)?;
let share_mock = share_bundle.join("mock").join(&mock_name);
let private_real = private_bundle.join("real").join(&real_name);
fs::copy(&mock_source, &share_mock)?;
fs::copy(&real_source, &private_real)?;
let mock_sha = file_digest(&share_mock)?;
let real_sha = file_digest(&private_real)?;
let mock_size = fs::metadata(&share_mock)?.len();
let real_size = fs::metadata(&private_real)?.len();
let created_at = rho_core::now_rfc3339();
let share_manifest = share_bundle.join("dataset.yaml");
let private_manifest = private_bundle.join("dataset.yaml");
fs::write(
&share_manifest,
format!(
concat!(
"version: 1\n",
"dataset:\n",
" uuid: {}\n",
" name: {}\n",
" owner: {}\n",
" description: {}\n",
" created_at: {}\n",
" variants:\n",
" mock:\n",
" tier: \"mock\"\n",
" relative_path: {}\n",
" file_name: {}\n",
" mime_type: {}\n",
" bytes: {}\n",
" sha256: {}\n",
" real:\n",
" tier: \"real\"\n",
" availability: \"private\"\n",
" staged_under_user_folder: true\n",
),
yaml_quote(&dataset_uuid),
yaml_quote(&name),
yaml_quote(&owner),
yaml_quote(&description),
yaml_quote(&created_at),
yaml_quote(&format!("mock/{mock_name}")),
yaml_quote(&mock_name),
yaml_quote(&mime_type(&share_mock)),
mock_size,
yaml_quote(&mock_sha),
),
)?;
fs::write(
&private_manifest,
format!(
concat!(
"version: 1\n",
"dataset:\n",
" uuid: {}\n",
" name: {}\n",
" owner: {}\n",
" description: {}\n",
" created_at: {}\n",
" variants:\n",
" mock:\n",
" tier: \"mock\"\n",
" share_manifest: {}\n",
" share_relative_path: {}\n",
" sha256: {}\n",
" real:\n",
" tier: \"real\"\n",
" relative_path: {}\n",
" file_name: {}\n",
" mime_type: {}\n",
" bytes: {}\n",
" sha256: {}\n",
),
yaml_quote(&dataset_uuid),
yaml_quote(&name),
yaml_quote(&owner),
yaml_quote(&description),
yaml_quote(&created_at),
yaml_quote(&canonical_display(&share_manifest)),
yaml_quote(&format!("mock/{mock_name}")),
yaml_quote(&mock_sha),
yaml_quote(&format!("real/{real_name}")),
yaml_quote(&real_name),
yaml_quote(&mime_type(&private_real)),
real_size,
yaml_quote(&real_sha),
),
)?;
println!("created twin dataset");
println!("uuid: {dataset_uuid}");
println!("name: {name}");
println!("owner: {owner}");
println!("share manifest: {}", share_manifest.display());
println!("private manifest: {}", private_manifest.display());
println!("share mock file: {}", share_mock.display());
println!("private real file: {}", private_real.display());
Ok(())
}
fn active_handle() -> Option<String> {
env::var("RHO_ENV_HANDLE")
.ok()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
}
fn default_share_dir(root: &Path, owner: &str) -> PathBuf {
root.join("users").join(owner).join("datasets/share")
}
fn default_private_dir(root: &Path, owner: &str) -> PathBuf {
root.join("users").join(owner).join("datasets/private")
}
fn dataset_path_slug(value: &str) -> RhoResult<String> {
if value.is_empty()
|| !value
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.')
{
return Err(format!("dataset name is not path-safe: {value}").into());
}
Ok(value.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_dataset_dirs_live_under_repo_users_owner() {
let root = Path::new("/repo");
assert_eq!(
default_share_dir(root, "madhavajay"),
PathBuf::from("/repo/users/madhavajay/datasets/share")
);
assert_eq!(
default_private_dir(root, "madhavajay"),
PathBuf::from("/repo/users/madhavajay/datasets/private")
);
}
#[test]
fn validates_dataset_bundle_slug() {
assert_eq!(dataset_path_slug("prices").unwrap(), "prices");
assert!(dataset_path_slug("../prices").is_err());
assert!(dataset_path_slug("prices/raw").is_err());
}
}