file_split_to_directory/
lib.rs

1use itertools::Itertools;
2use std::fs::{self, DirEntry};
3use std::num::NonZeroUsize;
4use std::path::PathBuf;
5
6#[derive(Debug, Clone)]
7pub struct FileSplitToDirectory {
8    path: PathBuf,
9    chunk: NonZeroUsize,
10    sort_cmp: fn(&DirEntry, &DirEntry) -> std::cmp::Ordering,
11    directory_name: fn(usize) -> String,
12}
13
14impl FileSplitToDirectory {
15    pub fn execute(&self) -> Result<(), std::io::Error> {
16        let chunks = fs::read_dir(&self.path)?
17            .enumerate()
18            .filter_map(|(_, f)| f.ok())
19            .filter(|f| {
20                let Ok(t) = f.file_type() else {
21                    return false;
22                };
23                t.is_file()
24            })
25            .sorted_by(self.sort_cmp)
26            .chunks(self.chunk.get());
27
28        for (i, chunk) in chunks.into_iter().enumerate() {
29            let target_root = self.path.join((self.directory_name)(i));
30            if !target_root.is_dir() {
31                fs::create_dir(&target_root)?;
32            }
33
34            for f in chunk {
35                let to = target_root.join(f.file_name());
36                fs::rename(f.path(), to)?;
37            }
38        }
39        Ok(())
40    }
41}
42
43type SortCmpFn = fn(&DirEntry, &DirEntry) -> std::cmp::Ordering;
44type DirectoryNameGeneratorFn = fn(usize) -> String;
45
46pub struct FileSplitToDirectoryBuilder {
47    pub path: Option<PathBuf>,
48    pub chunk: NonZeroUsize,
49    pub sort_cmp: SortCmpFn,
50    pub directory_name: DirectoryNameGeneratorFn,
51}
52
53impl FileSplitToDirectoryBuilder {
54    pub fn default_sort_cmp(a: &DirEntry, b: &DirEntry) -> std::cmp::Ordering {
55        natord::compare(
56            &a.file_name().to_string_lossy(),
57            &b.file_name().to_string_lossy(),
58        )
59    }
60
61    pub fn default_directory_name(i: usize) -> String {
62        i.to_string()
63    }
64
65    pub fn with_path(mut self, path: PathBuf) -> Self {
66        self.path = Some(path);
67        self
68    }
69
70    pub fn with_chunk(mut self, chunk: NonZeroUsize) -> Self {
71        self.chunk = chunk;
72        self
73    }
74
75    pub fn with_sort_cmp(mut self, sort_cmp: SortCmpFn) -> Self {
76        self.sort_cmp = sort_cmp;
77        self
78    }
79
80    pub fn with_directory_name(mut self, directory_name: DirectoryNameGeneratorFn) -> Self {
81        self.directory_name = directory_name;
82        self
83    }
84
85    pub fn build(&self) -> Result<FileSplitToDirectory, std::io::Error> {
86        if let Some(path) = &self.path {
87            Ok(FileSplitToDirectory {
88                path: path.clone(),
89                chunk: self.chunk,
90                sort_cmp: self.sort_cmp.clone(),
91                directory_name: self.directory_name.clone(),
92            })
93        } else {
94            Err(std::io::Error::new(
95                std::io::ErrorKind::Other,
96                "path is not set",
97            ))
98        }
99    }
100}
101
102impl Default for FileSplitToDirectoryBuilder {
103    fn default() -> Self {
104        Self {
105            path: None,
106            chunk: NonZeroUsize::new(4400).unwrap(),
107            sort_cmp: Self::default_sort_cmp,
108            directory_name: Self::default_directory_name,
109        }
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use fake::{Fake, Faker};
117    use fs::File;
118    use temp_dir::TempDir;
119
120    fn create_tmpfile(howmany: usize) -> TempDir {
121        let d = TempDir::new().unwrap();
122        for i in 0..howmany {
123            File::create(d.path().join(format!("{i}.tmp"))).unwrap();
124        }
125        d
126    }
127
128    #[test]
129    fn test_default() {
130        assert!(FileSplitToDirectoryBuilder::default().build().is_err());
131    }
132
133    #[test]
134    fn test_default_directory_name() {
135        for _ in 0..100 {
136            let v = Faker.fake::<usize>();
137            assert_eq!(
138                FileSplitToDirectoryBuilder::default_directory_name(v),
139                v.to_string()
140            );
141        }
142    }
143
144    #[test]
145    fn test_with_path() {
146        let path = Faker.fake::<PathBuf>();
147        let builder = FileSplitToDirectoryBuilder::default().with_path(path.clone());
148        assert_eq!(builder.path, Some(path));
149    }
150
151    #[test]
152    fn test_with_chunk() {
153        let chunk = Faker.fake::<NonZeroUsize>();
154        let builder = FileSplitToDirectoryBuilder::default().with_chunk(chunk);
155        assert_eq!(builder.chunk, chunk);
156    }
157
158    #[test]
159    fn test_with_sort_cmp() {
160        let tmp_dir = create_tmpfile(4);
161        FileSplitToDirectoryBuilder::default()
162            .with_path(tmp_dir.path().to_path_buf())
163            .with_chunk(2.try_into().unwrap())
164            .with_sort_cmp(|a: &DirEntry, b: &DirEntry| {
165                a.file_name()
166                    .into_string()
167                    .unwrap()
168                    .cmp(&b.file_name().into_string().unwrap())
169                    .reverse()
170            })
171            .build()
172            .unwrap()
173            .execute()
174            .unwrap();
175        let r: Vec<_> = fs::read_dir(tmp_dir.path()).unwrap().collect();
176        let first_directory_files: Vec<_> = fs::read_dir(r[0].as_ref().unwrap().path())
177            .unwrap()
178            .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
179            .collect();
180        assert!(first_directory_files.contains(&"2.tmp".to_string()));
181        assert!(first_directory_files.contains(&"3.tmp".to_string()));
182
183        let second_directory_files: Vec<_> = fs::read_dir(r[1].as_ref().unwrap().path())
184            .unwrap()
185            .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
186            .collect();
187        assert!(second_directory_files.contains(&"0.tmp".to_string()));
188        assert!(second_directory_files.contains(&"1.tmp".to_string()));
189    }
190
191    #[test]
192    fn test_with_directory_name() {
193        let tmp_dir = create_tmpfile(4);
194        FileSplitToDirectoryBuilder::default()
195            .with_path(tmp_dir.path().to_path_buf())
196            .with_chunk(2.try_into().unwrap())
197            .with_directory_name(|i: usize| (('a' as u8 + i as u8) as char).to_string())
198            .build()
199            .unwrap()
200            .execute()
201            .unwrap();
202        let r: Vec<_> = fs::read_dir(tmp_dir.path())
203            .unwrap()
204            .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
205            .collect();
206        assert!(r.contains(&"a".to_string()));
207        assert!(r.contains(&"b".to_string()));
208    }
209
210    #[test]
211    fn test_execute_default() {
212        let tmp_dir = create_tmpfile(4);
213        FileSplitToDirectoryBuilder::default()
214            .with_path(tmp_dir.path().to_path_buf())
215            .with_chunk(2.try_into().unwrap())
216            .build()
217            .unwrap()
218            .execute()
219            .unwrap();
220        let r: Vec<_> = fs::read_dir(tmp_dir.path())
221            .unwrap()
222            .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
223            .collect();
224        assert!(r.contains(&"0".to_string()));
225        assert!(r.contains(&"1".to_string()));
226
227        let r: Vec<_> = fs::read_dir(tmp_dir.path()).unwrap().collect();
228        let first_directory_files = fs::read_dir(r[0].as_ref().unwrap().path())
229            .unwrap()
230            .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
231            .collect::<Vec<_>>();
232        assert!(first_directory_files.contains(&"0.tmp".to_string()));
233        assert!(first_directory_files.contains(&"1.tmp".to_string()));
234
235        let second_directory_files: Vec<_> = fs::read_dir(r[1].as_ref().unwrap().path())
236            .unwrap()
237            .map(|f| f.unwrap().file_name().to_string_lossy().to_string())
238            .collect();
239        assert!(second_directory_files.contains(&"2.tmp".to_string()));
240        assert!(second_directory_files.contains(&"3.tmp".to_string()));
241    }
242}