file_split_to_directory/
lib.rs1use itertools::Itertools;
2use std::fs::{self, DirEntry};
3use std::num::NonZeroUsize;
4use std::path::PathBuf;
5
6#[derive(Debug, Clone)]
7pub struct FileSplitToDirectory {
8 path: PathBuf,
9 chunk: NonZeroUsize,
10 sort_cmp: fn(&DirEntry, &DirEntry) -> std::cmp::Ordering,
11 directory_name: fn(usize) -> String,
12}
13
14impl FileSplitToDirectory {
15 pub fn execute(&self) -> Result<(), std::io::Error> {
16 let chunks = fs::read_dir(&self.path)?
17 .enumerate()
18 .filter_map(|(_, f)| f.ok())
19 .filter(|f| {
20 let Ok(t) = f.file_type() else {
21 return false;
22 };
23 t.is_file()
24 })
25 .sorted_by(self.sort_cmp)
26 .chunks(self.chunk.get());
27
28 for (i, chunk) in chunks.into_iter().enumerate() {
29 let target_root = self.path.join((self.directory_name)(i));
30 if !target_root.is_dir() {
31 fs::create_dir(&target_root)?;
32 }
33
34 for f in chunk {
35 let to = target_root.join(f.file_name());
36 fs::rename(f.path(), to)?;
37 }
38 }
39 Ok(())
40 }
41}
42
43type SortCmpFn = fn(&DirEntry, &DirEntry) -> std::cmp::Ordering;
44type DirectoryNameGeneratorFn = fn(usize) -> String;
45
46pub struct FileSplitToDirectoryBuilder {
47 pub path: Option<PathBuf>,
48 pub chunk: NonZeroUsize,
49 pub sort_cmp: SortCmpFn,
50 pub directory_name: DirectoryNameGeneratorFn,
51}
52
53impl FileSplitToDirectoryBuilder {
54 pub fn default_sort_cmp(a: &DirEntry, b: &DirEntry) -> std::cmp::Ordering {
55 natord::compare(
56 &a.file_name().to_string_lossy(),
57 &b.file_name().to_string_lossy(),
58 )
59 }
60
61 pub fn default_directory_name(i: usize) -> String {
62 i.to_string()
63 }
64
65 pub fn with_path(mut self, path: PathBuf) -> Self {
66 self.path = Some(path);
67 self
68 }
69
70 pub fn with_chunk(mut self, chunk: NonZeroUsize) -> Self {
71 self.chunk = chunk;
72 self
73 }
74
75 pub fn with_sort_cmp(mut self, sort_cmp: SortCmpFn) -> Self {
76 self.sort_cmp = sort_cmp;
77 self
78 }
79
80 pub fn with_directory_name(mut self, directory_name: DirectoryNameGeneratorFn) -> Self {
81 self.directory_name = directory_name;
82 self
83 }
84
85 pub fn build(&self) -> Result<FileSplitToDirectory, std::io::Error> {
86 if let Some(path) = &self.path {
87 Ok(FileSplitToDirectory {
88 path: path.clone(),
89 chunk: self.chunk,
90 sort_cmp: self.sort_cmp.clone(),
91 directory_name: self.directory_name.clone(),
92 })
93 } else {
94 Err(std::io::Error::new(
95 std::io::ErrorKind::Other,
96 "path is not set",
97 ))
98 }
99 }
100}
101
102impl Default for FileSplitToDirectoryBuilder {
103 fn default() -> Self {
104 Self {
105 path: None,
106 chunk: NonZeroUsize::new(4400).unwrap(),
107 sort_cmp: Self::default_sort_cmp,
108 directory_name: Self::default_directory_name,
109 }
110 }
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116 use fake::{Fake, Faker};
117 use fs::File;
118 use temp_dir::TempDir;
119
120 fn create_tmpfile(howmany: usize) -> TempDir {
121 let d = TempDir::new().unwrap();
122 for i in 0..howmany {
123 File::create(d.path().join(format!("{i}.tmp"))).unwrap();
124 }
125 d
126 }
127
128 #[test]
129 fn test_default() {
130 assert!(FileSplitToDirectoryBuilder::default().build().is_err());
131 }
132
133 #[test]
134 fn test_default_directory_name() {
135 for _ in 0..100 {
136 let v = Faker.fake::<usize>();
137 assert_eq!(
138 FileSplitToDirectoryBuilder::default_directory_name(v),
139 v.to_string()
140 );
141 }
142 }
143
144 #[test]
145 fn test_with_path() {
146 let path = Faker.fake::<PathBuf>();
147 let builder = FileSplitToDirectoryBuilder::default().with_path(path.clone());
148 assert_eq!(builder.path, Some(path));
149 }
150
151 #[test]
152 fn test_with_chunk() {
153 let chunk = Faker.fake::<NonZeroUsize>();
154 let builder = FileSplitToDirectoryBuilder::default().with_chunk(chunk);
155 assert_eq!(builder.chunk, chunk);
156 }
157
158 #[test]
159 fn test_with_sort_cmp() {
160 let tmp_dir = create_tmpfile(4);
161 FileSplitToDirectoryBuilder::default()
162 .with_path(tmp_dir.path().to_path_buf())
163 .with_chunk(2.try_into().unwrap())
164 .with_sort_cmp(|a: &DirEntry, b: &DirEntry| {
165 a.file_name()
166 .into_string()
167 .unwrap()
168 .cmp(&b.file_name().into_string().unwrap())
169 .reverse()
170 })
171 .build()
172 .unwrap()
173 .execute()
174 .unwrap();
175 let r: Vec<_> = fs::read_dir(tmp_dir.path()).unwrap().collect();
176 let first_directory_files: Vec<_> = fs::read_dir(r[0].as_ref().unwrap().path())
177 .unwrap()
178 .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
179 .collect();
180 assert!(first_directory_files.contains(&"2.tmp".to_string()));
181 assert!(first_directory_files.contains(&"3.tmp".to_string()));
182
183 let second_directory_files: Vec<_> = fs::read_dir(r[1].as_ref().unwrap().path())
184 .unwrap()
185 .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
186 .collect();
187 assert!(second_directory_files.contains(&"0.tmp".to_string()));
188 assert!(second_directory_files.contains(&"1.tmp".to_string()));
189 }
190
191 #[test]
192 fn test_with_directory_name() {
193 let tmp_dir = create_tmpfile(4);
194 FileSplitToDirectoryBuilder::default()
195 .with_path(tmp_dir.path().to_path_buf())
196 .with_chunk(2.try_into().unwrap())
197 .with_directory_name(|i: usize| (('a' as u8 + i as u8) as char).to_string())
198 .build()
199 .unwrap()
200 .execute()
201 .unwrap();
202 let r: Vec<_> = fs::read_dir(tmp_dir.path())
203 .unwrap()
204 .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
205 .collect();
206 assert!(r.contains(&"a".to_string()));
207 assert!(r.contains(&"b".to_string()));
208 }
209
210 #[test]
211 fn test_execute_default() {
212 let tmp_dir = create_tmpfile(4);
213 FileSplitToDirectoryBuilder::default()
214 .with_path(tmp_dir.path().to_path_buf())
215 .with_chunk(2.try_into().unwrap())
216 .build()
217 .unwrap()
218 .execute()
219 .unwrap();
220 let r: Vec<_> = fs::read_dir(tmp_dir.path())
221 .unwrap()
222 .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
223 .collect();
224 assert!(r.contains(&"0".to_string()));
225 assert!(r.contains(&"1".to_string()));
226
227 let r: Vec<_> = fs::read_dir(tmp_dir.path()).unwrap().collect();
228 let first_directory_files = fs::read_dir(r[0].as_ref().unwrap().path())
229 .unwrap()
230 .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
231 .collect::<Vec<_>>();
232 assert!(first_directory_files.contains(&"0.tmp".to_string()));
233 assert!(first_directory_files.contains(&"1.tmp".to_string()));
234
235 let second_directory_files: Vec<_> = fs::read_dir(r[1].as_ref().unwrap().path())
236 .unwrap()
237 .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
238 .collect();
239 assert!(second_directory_files.contains(&"2.tmp".to_string()));
240 assert!(second_directory_files.contains(&"3.tmp".to_string()));
241 }
242}