use crate::ro_crate::read::read_crate;
use crate::ro_crate::rocrate::RoCrate;
use dirs;
use std::collections::HashMap;
use std::fmt;
use std::fs::{self, File};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use url::Url;
use walkdir::WalkDir;
use zip::{write::FileOptions, ZipWriter};
pub fn write_crate(rocrate: &RoCrate, name: String) {
match serde_json::to_string_pretty(&rocrate) {
Ok(json_ld) => match File::create(name) {
Ok(mut file) => {
if writeln!(file, "{}", json_ld).is_err() {
eprintln!("Failed to write to the file.");
}
}
Err(e) => eprintln!("Failed to create file: {}", e),
},
Err(e) => eprintln!("Serialization failed: {}", e),
}
}
fn write_crate_to_zip(
rocrate: &RoCrate,
name: String,
mut zip_data: RoCrateZip,
) -> Result<(), ZipError> {
let json_ld = serde_json::to_string_pretty(&rocrate)
.map_err(|e| ZipError::ZipOperationError(e.to_string()))?;
zip_data
.zip
.start_file(name, zip_data.options)
.map_err(|e| ZipError::ZipOperationError(e.to_string()))?;
zip_data
.zip
.write_all(json_ld.as_bytes())
.map_err(|e| ZipError::ZipOperationError(e.to_string()))?;
zip_data
.zip
.finish()
.map_err(|e| ZipError::ZipOperationError(e.to_string()))?;
Ok(())
}
pub fn zip_crate(
crate_path: &Path,
external: bool,
validation_level: i8,
flatten: bool,
unique: bool,
) -> Result<(), ZipError> {
let mut zip_paths = construct_paths(crate_path).unwrap();
let mut rocrate = read_crate(&zip_paths.absolute_path, validation_level).unwrap();
rocrate.context.add_urn_uuid();
println!("{:?}", zip_paths);
if unique {
let base_id = rocrate.context.get_specific_context("@base").unwrap();
let stripped_id = format!("{}.zip", base_id.strip_prefix("urn:uuid:").unwrap());
zip_paths.zip_file_name = zip_paths.root_path.join(stripped_id);
}
println!("{:?}", zip_paths);
let mut zip_data = build_zip(&zip_paths).unwrap();
let _ = directory_walk(&mut rocrate, &zip_paths, &mut zip_data, flatten);
if external {
zip_data = zip_crate_external(&mut rocrate, zip_data, &zip_paths)?
}
let _ = write_crate_to_zip(&rocrate, "ro-crate-metadata.json".to_string(), zip_data);
Ok(())
}
#[derive(Debug)]
pub struct RoCrateZipPaths {
absolute_path: PathBuf,
root_path: PathBuf,
zip_file_name: PathBuf,
}
fn construct_paths(crate_path: &Path) -> Result<RoCrateZipPaths, Box<dyn std::error::Error>> {
let absolute_path = get_absolute_path(crate_path).unwrap();
let root_path = absolute_path.parent().unwrap().to_path_buf();
let zip_file_base_name = root_path
.file_name()
.ok_or(ZipError::FileNameNotFound)?
.to_str()
.ok_or(ZipError::FileNameConversionFailed)?;
let zip_file_name = root_path.join(format!("{}.zip", zip_file_base_name));
Ok(RoCrateZipPaths {
absolute_path,
root_path,
zip_file_name,
})
}
fn build_zip(path_information: &RoCrateZipPaths) -> Result<RoCrateZip, Box<dyn std::error::Error>> {
let file = File::create(&path_information.zip_file_name).map_err(ZipError::IoError)?;
let zip = ZipWriter::new(file);
let options = FileOptions::default().compression_method(zip::CompressionMethod::Deflated);
Ok(RoCrateZip { zip, options })
}
pub struct RoCrateZip {
zip: ZipWriter<File>,
options: FileOptions,
}
fn directory_walk(
rocrate: &mut RoCrate,
zip_paths: &RoCrateZipPaths,
zip_data: &mut RoCrateZip,
flatten: bool,
) -> Result<Vec<PathBuf>, Box<dyn std::error::Error>> {
let mut data_vec: Vec<PathBuf> = Vec::new();
let contained = get_noncontained_data_entites(rocrate, zip_paths, true);
for entry in WalkDir::new(&zip_paths.root_path)
.min_depth(0)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.path().is_file())
{
let path = entry.path();
let file_name: String;
if path == zip_paths.zip_file_name {
continue;
}
if path == zip_paths.absolute_path {
continue;
}
if flatten {
file_name = path
.file_name()
.ok_or(ZipError::FileNameNotFound)?
.to_str()
.ok_or(ZipError::FileNameConversionFailed)?
.to_string();
} else {
file_name = path
.strip_prefix(&zip_paths.root_path)
.map_err(ZipError::PathError)?
.to_str()
.ok_or(ZipError::FileNameConversionFailed)?
.to_string();
}
let mut file = fs::File::open(path).map_err(ZipError::IoError)?;
zip_data
.zip
.start_file(&file_name, zip_data.options)
.map_err(|e| ZipError::ZipOperationError(e.to_string()))?;
let abs_path = get_absolute_path(path).unwrap();
if abs_path.is_file() {
data_vec.push(abs_path.clone());
};
let copy_result = io::copy(&mut file, &mut zip_data.zip).map_err(ZipError::IoError);
match copy_result {
Ok(_) => {
for (key, value) in &contained {
if abs_path == value.clone() {
rocrate.update_id_recursive(key, &file_name)
}
}
}
Err(_e) => println!("problem"),
}
}
println!("0 | Rocrate: {:?}", rocrate);
Ok(data_vec)
}
#[derive(Debug)]
pub enum ZipError {
EmptyDirectoryVector,
FileNameNotFound,
FileNameConversionFailed,
PathError(std::path::StripPrefixError),
ZipOperationError(String),
IoError(io::Error),
}
impl fmt::Display for ZipError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ZipError::EmptyDirectoryVector => write!(f, "Directory vector is empty"),
ZipError::FileNameNotFound => write!(f, "File name not found"),
ZipError::FileNameConversionFailed => write!(f, "Failed to convert file name"),
ZipError::ZipOperationError(ref msg) => write!(f, "Zip operation Error: {}", msg),
ZipError::PathError(ref err) => write!(f, "Path error: {}", err),
ZipError::IoError(ref err) => write!(f, "IO error: {}", err),
}
}
}
impl std::error::Error for ZipError {}
impl From<io::Error> for ZipError {
fn from(err: io::Error) -> ZipError {
ZipError::IoError(err)
}
}
impl From<std::path::StripPrefixError> for ZipError {
fn from(err: std::path::StripPrefixError) -> ZipError {
ZipError::PathError(err)
}
}
pub fn zip_crate_external(
rocrate: &mut RoCrate,
mut zip_data: RoCrateZip,
crate_path: &RoCrateZipPaths,
) -> Result<RoCrateZip, ZipError> {
let noncontained = get_noncontained_data_entites(rocrate, crate_path, false);
if !noncontained.is_empty() {
for (id, path) in noncontained {
let file_name = path
.file_name()
.ok_or(ZipError::FileNameNotFound)?
.to_str()
.ok_or(ZipError::FileNameConversionFailed)?;
let zip_entry_name = format!("external/{}", file_name);
let mut file = fs::File::open(&path).map_err(ZipError::IoError)?;
zip_data
.zip
.start_file(&zip_entry_name, zip_data.options)
.map_err(|e| ZipError::ZipOperationError(e.to_string()))?;
let copy_result = io::copy(&mut file, &mut zip_data.zip).map_err(ZipError::IoError);
match copy_result {
Ok(_) => {
rocrate.update_id_recursive(&id, &zip_entry_name);
}
Err(e) => return Err(e),
}
}
}
Ok(zip_data)
}
fn get_noncontained_data_entites(
rocrate: &mut RoCrate,
crate_path: &RoCrateZipPaths,
inverse: bool,
) -> HashMap<String, PathBuf> {
let mut ids = rocrate.get_all_ids();
ids.retain(|id| is_not_url(id));
get_noncontained_paths(ids, &crate_path.root_path, inverse)
}
fn get_noncontained_paths(
ids: Vec<&String>,
crate_dir: &Path,
inverse: bool,
) -> HashMap<String, PathBuf> {
let mut nonrels: HashMap<String, PathBuf> = HashMap::new();
let rocrate_path = get_absolute_path(crate_dir).unwrap();
println!("crate path {:?} and target id {:?}", rocrate_path, ids);
for id in ids.iter() {
if id.starts_with('#') {
continue;
}
if let Some(path) = get_absolute_path(Path::new(id)) {
if path.exists() {
println!("Absolute path: {:?}", path);
if is_outside_base_folder(&rocrate_path, &path) && !inverse {
nonrels.insert(id.to_string(), path);
} else if inverse {
nonrels.insert(id.to_string(), path);
}
}
} else {
println!("ID: {:?}", id);
let path = match Path::new(id).canonicalize() {
Ok(resolved) => Ok(resolved),
Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(resolve_tilde_path(id)),
Err(e) => Err(continue),
};
println!("Pre Resolved path: {:?}", path);
let resolved_path = rocrate_path.join(path.unwrap()).canonicalize();
println!("Resolved path: {:?}", resolved_path);
match resolved_path {
Ok(abs_path) => {
println!("Can confirm: {:?}", abs_path);
if abs_path.exists() {
println!("Exists: {:?}", abs_path);
if is_outside_base_folder(&rocrate_path, &abs_path) && !inverse {
nonrels.insert(id.to_string(), abs_path);
} else if inverse {
nonrels.insert(id.to_string(), abs_path);
}
} else {
}
}
Err(_e) => {} }
}
}
nonrels
}
fn resolve_tilde_path(path: &str) -> PathBuf {
if let Some(home_dir) = dirs::home_dir() {
if path.starts_with("~") {
return home_dir.join(path.strip_prefix("~/").unwrap_or(""));
}
}
Path::new(path).to_path_buf()
}
fn get_absolute_path(relative_path: &Path) -> Option<PathBuf> {
match fs::canonicalize(relative_path) {
Ok(path) => Some(path),
Err(_e) => None,
}
}
pub fn is_not_url(path: &str) -> bool {
let is_extended_windows_path = path.starts_with(r"\\?\");
let is_normal_file_path = path.starts_with(r"\\") || path.chars().next().map(|c| c.is_alphabetic() && path.chars().nth(1) == Some(':')).unwrap_or(false) || path.starts_with('/') || path.starts_with('.') || path.starts_with("file:");
if path.contains("ro-crate-metadata.json") || path == "./" {
return false;
}
if is_extended_windows_path || is_normal_file_path {
return true;
}
Url::parse(path).is_err()
}
fn is_outside_base_folder(base_folder: &Path, file_path: &Path) -> bool {
println!("Base folder: {:?} | file path {:?}", base_folder, file_path);
!file_path.starts_with(base_folder)
}
#[cfg(test)]
mod write_crate_tests {
use super::*;
use crate::ro_crate::read::read_crate;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
fn fixture_path(relative_path: &str) -> PathBuf {
Path::new("tests/fixtures").join(relative_path)
}
#[test]
fn test_write_crate_success() {
let path = fixture_path("_ro-crate-metadata-minimal.json");
let rocrate = read_crate(&path, 0).unwrap();
let file_name = "test_rocrate_output.json";
write_crate(&rocrate, file_name.to_string());
assert!(Path::new(file_name).exists());
let file_content = fs::read_to_string(file_name).expect("Failed to read file");
let expected_json = serde_json::to_string_pretty(&rocrate).expect("Failed to serialize");
println!("{}", file_content);
assert_eq!(file_content.trim_end(), expected_json);
fs::remove_file(file_name).expect("Failed to remove test file");
}
#[test]
fn test_zip_crate_basic() {
let path = fixture_path("test_experiment/_ro-crate-metadata-minimal.json");
let zipped = zip_crate(&path, false, 0, false, false);
println!("{:?}", zipped);
}
#[test]
fn test_zip_crate_external_full() {
let path = fixture_path("test_experiment/_ro-crate-metadata-minimal.json");
let zipped = zip_crate(&path, true, 0, false, false);
println!("{:?}", zipped);
}
#[test]
fn test_zip_crate_external_full_unique() {
let path = fixture_path("unique_zips/_ro-crate-metadata-minimal.json");
let zipped = zip_crate(&path, true, 0, false, true);
println!("{:?}", zipped);
}
#[test]
fn test_construct_paths() {
let cwd = env::current_dir().unwrap();
let path = fixture_path("test_experiment/_ro-crate-metadata-minimal.json")
.canonicalize()
.unwrap();
let paths = construct_paths(&path).unwrap();
assert_eq!(paths.absolute_path, cwd.join(&path));
assert_eq!(
paths.root_path,
cwd.join(PathBuf::from("tests/fixtures/test_experiment"))
.canonicalize()
.unwrap()
);
assert_eq!(
paths.zip_file_name,
cwd.join(
PathBuf::from("tests/fixtures/test_experiment/test_experiment.zip")
.canonicalize()
.unwrap()
)
);
}
#[test]
fn test_directory_walk() {
let cwd = env::current_dir().unwrap();
let path = fixture_path("test_experiment/_ro-crate-metadata-minimal.json");
let zip_paths = RoCrateZipPaths {
absolute_path: cwd.join(&path),
root_path: cwd.join(PathBuf::from("tests/fixtures/test_experiment")),
zip_file_name: cwd.join(PathBuf::from(
"tests/fixtures/test_experiment/test_experiment.zip",
)),
};
let mut zip_data = RoCrateZip {
zip: ZipWriter::new(File::create(&zip_paths.zip_file_name).unwrap()),
options: FileOptions::default().compression_method(zip::CompressionMethod::Deflated),
};
let path = fixture_path("test_experiment/_ro-crate-metadata-minimal.json");
let mut rocrate = read_crate(&path, 0).unwrap();
let mut directory_contents =
directory_walk(&mut rocrate, &zip_paths, &mut zip_data, false).unwrap();
let mut test_vec: Vec<PathBuf> = vec![
cwd.join(
PathBuf::from("tests/fixtures/test_experiment/data.csv")
.canonicalize()
.unwrap(),
),
cwd.join(
PathBuf::from("tests/fixtures/test_experiment/text_1.txt")
.canonicalize()
.unwrap(),
),
];
directory_contents.sort();
test_vec.sort();
assert_eq!(directory_contents, test_vec);
}
#[test]
fn test_is_not_url() {
let mut url_types: HashMap<&str, bool> = HashMap::new();
url_types.insert("http://example.com", false); url_types.insert("https://example.com", false); url_types.insert("ftp://ftp.example.com", false); url_types.insert("sftp://example.com", false); url_types.insert("ws://example.com/socket", false); url_types.insert("wss://example.com/socket", false); url_types.insert("data:text/html,<html>Hello!</html>", false); url_types.insert("blob:https://example.com/uuid", false); url_types.insert("mailto:someone@example.com?subject=Hello", false); url_types.insert("tel:+1234567890", false); url_types.insert("sms:+1234567890?body=Hello", false); url_types.insert("jdbc:mysql://localhost:3306/database", false); url_types.insert("urn:uuid:123e4567-e89b-12d3-a456-426614174000", false); url_types.insert("ldap://example.com:389/dc=example,dc=com", false); url_types.insert("ssh://user@server.com", false); url_types.insert("rtsp://media.example.com/video", false); url_types.insert("mms://stream.example.com", false); url_types.insert("magnet:?xt=urn:btih:hash1234", false); url_types.insert("geo:37.7749,-122.4194", false); url_types.insert(
"bitcoin:1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa?amount=0.01",
false,
); url_types.insert("ipfs://bafybeic3cphk4a", false); url_types.insert("irc://irc.libera.chat/#rust", false); url_types.insert("git://github.com/user/repo.git", false); url_types.insert("telnet://192.168.1.1", false); url_types.insert("news:comp.lang.rust", false); url_types.insert("about:blank", false); url_types.insert("chrome://settings/", false); url_types.insert("javascript:alert('Hello')", false); url_types.insert("s3://bucket-name/object-key", false); url_types.insert("gs://bucket-name/object-key", false); url_types.insert("azure://container-name/blob-name", false); url_types.insert("swift://container-name/object-name", false); url_types.insert("wasabi://bucket-name/object-key", false); url_types.insert("minio://bucket-name/object-key", false); url_types.insert("aliyun://bucket-name/object-key", false); url_types.insert("digitalocean://bucket-name/object-key", false); url_types.insert("ibmcloud://bucket-name/object-key", false); url_types.insert("backblaze://bucket-name/object-key", false); url_types.insert("rackspace://container-name/object-name", false); url_types.insert("oracle://bucket-name/object-key", false);
url_types.insert("#test", true); url_types.insert(
"main.nf#main/FAMOSAB_WRROCMETATEST:WRROCMETATEST:FASTP",
true,
);
url_types.insert("file:///C:/Windows/System32/drivers/etc/hosts", true); url_types.insert("file:///Users/user/Documents/notes.txt", true); url_types.insert("/home/user/Documents/report.pdf", true); url_types.insert("C:\\Users\\User\\Downloads\\file.txt", true); url_types.insert("../relative/path/to/file.txt", true); url_types.insert("./current/directory/file.txt", true); url_types.insert("/var/log/syslog", true); url_types.insert("~/.ssh/config", true); url_types.insert("~/Documents/resume.docx", true); url_types.insert("/mnt/data/project/files", true); url_types.insert("E:\\Music\\playlist.m3u", true); url_types.insert("\\\\network\\share\\folder\\file.txt", true); url_types.insert("/etc/nginx/nginx.conf", true); url_types.insert("/opt/app/bin/start.sh", true); url_types.insert("/dev/null", true); url_types.insert("C:/Program Files/App/app.exe", true); url_types.insert("/usr/local/bin/script", true); url_types.insert("D:/Projects/Code/main.rs", true);
for (key, value) in url_types {
let test = is_not_url(key);
println!("Func result: {}, testing: {}, {}", test, key, value);
assert_eq!(test, value);
}
}
fn user_root_unix(mut path_types: HashMap<&str, bool>) -> HashMap<&str, bool> {
if !cfg!(windows) {
path_types.insert("~/.cargo/env", true); path_types.insert("/var/log/syslog", true); }
path_types
}
#[test]
fn test_get_noncontained_paths() {
let mut path_types: HashMap<&str, bool> = HashMap::new();
let cwd = env::current_dir().unwrap();
let crate_path = cwd.join(PathBuf::from("tests/fixtures/test_experiment"));
path_types.insert("../invalid.json", true); path_types.insert("../external.txt", true); path_types.insert("./data.csv", false); path_types.insert("./text_1.txt", false); path_types.insert("text_1.txt", false); path_types.insert("#fragment", false);
path_types = user_root_unix(path_types);
let abs_not = cwd
.join(PathBuf::from("README.md"))
.to_str()
.unwrap()
.to_string();
path_types.insert(&abs_not, true);
let abs_is = cwd
.join(crate_path.join(PathBuf::from("data.csv")))
.to_str()
.unwrap()
.to_string();
path_types.insert(&abs_is, false);
for (key, value) in path_types {
let mut input_vec: Vec<&String> = Vec::new();
let target = key.to_string();
input_vec.push(&target);
let test = get_noncontained_paths(input_vec.clone(), &crate_path, false);
if test.is_empty() {
println!("Test is empty for relative ID: {}", key);
assert_eq!(value, false)
} else {
println!("Test is successful for relative ID: {}", key);
assert_eq!(value, true)
}
}
}
#[test]
fn test_zip_crate_external_func() {
let cwd = env::current_dir().unwrap();
let path = fixture_path("test_experiment/_ro-crate-metadata-minimal.json");
let mut rocrate = read_crate(&path, 0).unwrap();
let zip_paths = RoCrateZipPaths {
absolute_path: cwd.join(&path),
root_path: cwd.join(PathBuf::from("tests/fixtures/test_experiment")),
zip_file_name: cwd.join(PathBuf::from(
"tests/fixtures/test_experiment/test_experiment.zip",
)),
};
let zip_data = RoCrateZip {
zip: ZipWriter::new(File::create(&zip_paths.zip_file_name).unwrap()),
options: FileOptions::default().compression_method(zip::CompressionMethod::Deflated),
};
let _zipped = zip_crate_external(&mut rocrate, zip_data, &zip_paths);
}
}