yolo-set 0.1.0

A CLI tool for managing YOLO datasets — merge, deduplicate, remap labels, and more.
use std::fs;
use std::io;
use std::path::{Path, PathBuf};

use crate::model::merge_mode::MergeMode;

fn move_file(src_folder: &Path, dest_folder: &Path) -> io::Result<()> {
	fs::create_dir_all(dest_folder).map_err(|e|{eprintln!("move_file");e})?;
	for entry in fs::read_dir(src_folder).map_err(|e|{eprintln!("move_file");e})? {
		let entry = entry.map_err(|e|{eprintln!("move_file");e})?;
		let src_path = entry.path();
		if src_path.is_file() {
			let file_name = entry.file_name();
			let lossy = file_name.to_string_lossy();
			let mut dest_path = dest_folder.join(&file_name);
			let mut i = 1;
			while dest_path.exists() {
				let new_file_name = format!("{}_{}", lossy, i);
				dest_path = dest_folder.join(&new_file_name);
				i += 1;
			}
			fs::rename(&src_path, &dest_path).map_err(|e|{eprintln!("move_file");e})?;
		}
	}
	Ok(())
}

pub(crate) fn copy_dir(src: &Path, dest: &Path) -> io::Result<()> {
	fs::create_dir_all(dest).map_err(|e|{eprintln!("copy_dir 创建副本路径失败");e})?;
	for entry in fs::read_dir(src).map_err(|e|{eprintln!("copy_dir打开源文件夹路径{src:?}失败");e})? {
		let entry = entry.map_err(|e|{eprintln!("copy_dir 3");e})?;
		let src_path = entry.path();
		let dest_path = dest.join(entry.file_name());
		if src_path.is_dir() {
			copy_dir(&src_path, &dest_path).map_err(|e|{eprintln!("copy_dir 递归复制");e})?;
		} else {
			fs::copy(&src_path, &dest_path).map_err(|e|{eprintln!("copy_dir 非递归复制");e})?;
		}
	}
	Ok(())
}

pub(crate) fn create_dir_copy(src: &Path) -> io::Result<PathBuf> {
	if !src.is_dir() {
		return Err(io::Error::new(
			io::ErrorKind::NotFound,
			format!("create_dir_copy: 源路径不存在或不是目录: {src:?}"),
		));
	}
	let name = src.file_name().expect("create_dir_copy: 源路径无文件名");
	let parent = src.parent().expect("create_dir_copy: 源路径无父目录");
	let mut idx = 0usize;
	while parent
		.join(format!("{}_copy_{}", name.to_string_lossy(), idx))
		.exists()
	{
		idx += 1;
	}
	let dest = parent.join(format!("{}_copy_{}", name.to_string_lossy(), idx));
	copy_dir(src, &dest)?;
	Ok(dest)
}

pub(crate) fn merge_image_and_label(dataset: &Path, output_folder: &Path) -> io::Result<()> {
	let folder_path_list = vec![
		"train/images",
		"train/labels",
		"test/images",
		"test/labels",
		"valid/images",
		"valid/labels",
	];
	for folder_path in folder_path_list {
		move_file(&dataset.join(folder_path), &output_folder.join(folder_path))?;
	}
	Ok(())
}

pub fn create_work_dir(src_folder: &Path, mode: MergeMode) -> io::Result<(PathBuf, Option<DirCleanup>)> {
	let (work_folder, work_folder_cleanup) = match mode {
		MergeMode::Move => (src_folder.to_path_buf(), None),
		MergeMode::Copy => {
			let w = create_dir_copy(src_folder)?;
			let path = w.clone();
			(path, Some(DirCleanup::new(w)))
		}
	};
	Ok((work_folder,work_folder_cleanup))
}

pub struct DirCleanup {
	dir: PathBuf,
}

impl DirCleanup {
	pub fn new(dir: PathBuf) -> Self {
		Self { dir }
	}
}

impl Drop for DirCleanup {
	fn drop(&mut self) {
		let _ = fs::remove_dir_all(&self.dir);
	}
}

#[cfg(test)]
mod tests {
	use super::*;
	use std::fs;
	use std::sync::atomic::{AtomicUsize, Ordering};

	static COUNTER: AtomicUsize = AtomicUsize::new(0);

	fn tmp_dir() -> std::path::PathBuf {
		let n = COUNTER.fetch_add(1, Ordering::Relaxed);
		let dir = std::env::temp_dir().join(format!("dir_test_{}_{}", std::process::id(), n));
		let _ = fs::remove_dir_all(&dir);
		fs::create_dir_all(&dir).unwrap();
		dir
	}

	// ── copy_dir ──

	#[test]
	fn copy_dir_flat_files() {
		let tmp = tmp_dir();
		let src = tmp.join("src");
		let dst = tmp.join("dst");
		fs::create_dir_all(&src).unwrap();
		fs::write(src.join("a.txt"), "hello").unwrap();
		fs::write(src.join("b.txt"), "world").unwrap();

		copy_dir(&src, &dst).unwrap();

		assert!(dst.join("a.txt").exists());
		assert!(dst.join("b.txt").exists());
		assert_eq!(fs::read_to_string(dst.join("a.txt")).unwrap(), "hello");
		assert_eq!(fs::read_to_string(src.join("a.txt")).unwrap(), "hello");
		fs::remove_dir_all(&tmp).ok();
	}

	#[test]
	fn copy_dir_nested() {
		let tmp = tmp_dir();
		let src = tmp.join("src");
		let dst = tmp.join("dst");
		fs::create_dir_all(src.join("sub")).unwrap();
		fs::write(src.join("sub").join("x.txt"), "nested").unwrap();

		copy_dir(&src, &dst).unwrap();

		assert!(dst.join("sub").join("x.txt").exists());
		assert_eq!(fs::read_to_string(dst.join("sub").join("x.txt")).unwrap(), "nested");
		fs::remove_dir_all(&tmp).ok();
	}

	#[test]
	fn copy_dir_empty_src() {
		let tmp = tmp_dir();
		let src = tmp.join("empty_src");
		let dst = tmp.join("empty_dst");
		fs::create_dir_all(&src).unwrap();

		copy_dir(&src, &dst).unwrap();

		assert!(dst.is_dir());
		assert_eq!(fs::read_dir(&dst).unwrap().count(), 0);
		fs::remove_dir_all(&tmp).ok();
	}

	// ── create_dir_copy ──

	#[test]
	fn create_dir_copy_names_correctly() {
		let tmp = tmp_dir();
		let src = tmp.join("mydata");
		fs::create_dir_all(&src).unwrap();
		fs::write(src.join("f.txt"), "content").unwrap();

		let copy_path = create_dir_copy(&src).unwrap();

		assert!(copy_path.file_name().unwrap().to_string_lossy().contains("mydata"));
		assert!(copy_path.file_name().unwrap().to_string_lossy().contains("_copy_"));
		assert_eq!(copy_path.parent().unwrap(), tmp);
		assert!(copy_path.join("f.txt").exists());
		fs::remove_dir_all(&tmp).ok();
	}

	#[test]
	fn create_dir_copy_increments_index() {
		let tmp = tmp_dir();
		let src = tmp.join("data");
		fs::create_dir_all(&src).unwrap();
		let name0 = src.file_name().unwrap().to_string_lossy();
		fs::create_dir_all(tmp.join(format!("{}_copy_0", name0))).unwrap();

		let copy_path = create_dir_copy(&src).unwrap();
		assert!(copy_path.file_name().unwrap().to_string_lossy().contains("_copy_1"));
		fs::remove_dir_all(&tmp).ok();
	}

	// ── create_work_dir ──

	#[test]
	fn create_work_dir_move_mode() {
		let tmp = tmp_dir();
		let src = tmp.join("work");
		fs::create_dir_all(&src).unwrap();

		let (path, cleanup) = create_work_dir(&src, MergeMode::Move).unwrap();
		assert_eq!(path, src);
		assert!(cleanup.is_none());
		fs::remove_dir_all(&tmp).ok();
	}

	#[test]
	fn create_work_dir_copy_mode_returns_cleanup() {
		let tmp = tmp_dir();
		let src = tmp.join("work");
		fs::create_dir_all(&src).unwrap();

		let (path, cleanup) = create_work_dir(&src, MergeMode::Copy).unwrap();
		assert_ne!(path, src);
		assert!(path.exists());
		assert!(cleanup.is_some());
		drop(cleanup);
		assert!(!path.exists());
		fs::remove_dir_all(&tmp).ok();
	}

	// ── merge_image_and_label ──

	#[test]
	fn merge_moves_files_to_dest() {
		let tmp = tmp_dir();
		let dataset = tmp.join("dataset");
		let output = tmp.join("output");

		for d in &["train/images", "train/labels", "test/images", "test/labels", "valid/images", "valid/labels"] {
			fs::create_dir_all(dataset.join(d)).unwrap();
		}
		fs::write(dataset.join("train/images/img1.jpg"), "img").unwrap();
		fs::write(dataset.join("train/labels/img1.txt"), "0 0.5").unwrap();

		merge_image_and_label(&dataset, &output).unwrap();

		assert!(output.join("train/images/img1.jpg").exists());
		assert!(output.join("train/labels/img1.txt").exists());
		assert!(!dataset.join("train/images/img1.jpg").exists());
		assert!(!dataset.join("train/labels/img1.txt").exists());
		fs::remove_dir_all(&tmp).ok();
	}

	#[test]
	fn merge_creates_all_six_dirs() {
		let tmp = tmp_dir();
		let dataset = tmp.join("dataset");
		let output = tmp.join("output");
		let dirs = ["train/images", "train/labels", "test/images", "test/labels", "valid/images", "valid/labels"];
		for d in &dirs {
			fs::create_dir_all(dataset.join(d)).unwrap();
		}

		merge_image_and_label(&dataset, &output).unwrap();

		for d in &dirs {
			assert!(output.join(d).is_dir(), "missing: {}", d);
		}
		fs::remove_dir_all(&tmp).ok();
	}

	// ── DirCleanup ──

	#[test]
	fn dir_cleanup_removes_on_drop() {
		let tmp = tmp_dir();
		let target = tmp.join("to_clean");
		fs::create_dir_all(&target).unwrap();
		fs::write(target.join("x.txt"), "data").unwrap();

		{
			let _cleanup = DirCleanup::new(target.clone());
			assert!(target.exists());
		}
		assert!(!target.exists());
		fs::remove_dir_all(&tmp).ok();
	}

	#[test]
	fn dir_cleanup_nonexistent_no_panic() {
		let tmp = tmp_dir();
		let target = tmp.join("ghost");
		{
			let _cleanup = DirCleanup::new(target);
		}
		fs::remove_dir_all(&tmp).ok();
	}
}