1use crate::PageFormat;
2use std::fs::File;
3use std::io::{self, BufRead, BufReader, Write};
4use std::path::{Path, PathBuf};
5
6fn create_new_output_file(
9 output_directory: &Path,
10 file_counter: u64,
11 output_name: &str,
12) -> io::Result<(File, PathBuf)> {
13 let file_path = output_directory.join(format!("{}_{}.txt", output_name, file_counter));
14 let file = File::create(&file_path)?;
15 Ok((file, file_path))
16}
17
18fn check_max_output_file_size(
21 page_format: &PageFormat,
22 max_output_file_size: u64,
23) -> io::Result<()> {
24 if page_format.get_page_header_size() + page_format.get_page_footer_size()
25 > max_output_file_size
26 {
27 let error_message = format!(
28 "Error: The maximum file size ({}) is too small to contain the page header and footer.",
29 max_output_file_size
30 );
31 Err(io::Error::new(io::ErrorKind::InvalidData, error_message))
32 } else {
33 Ok(())
34 }
35}
36
37fn next_output_file(
40 output_directory: &Path,
41 output_file_counter: &mut u64,
42 output_name: &str,
43 current_output_file_size: &mut u64,
44 generated_output_files: &mut Vec<PathBuf>,
45 output_file: &mut File,
46 output_file_path: &mut PathBuf,
47) -> io::Result<()> {
48 *output_file_counter += 1;
49 *current_output_file_size = 0;
50 (*output_file, *output_file_path) =
51 create_new_output_file(output_directory, *output_file_counter, output_name)?;
52 generated_output_files.push(output_file_path.to_path_buf());
53 Ok(())
54}
55
56pub fn split_files_into_chunks(
92 target_files: &[PathBuf],
93 target_files_root_path: Option<&Path>,
94 output_directory: &Path,
95 max_output_file_size: u64,
96 output_name: &str,
97) -> io::Result<Vec<PathBuf>> {
98 let mut generated_output_files = Vec::new();
99 let mut output_file_counter: u64 = 1;
100 let mut current_output_file_size: u64 = 0;
101 let mut current_target_file_name: String;
102 let mut page_format: PageFormat;
103
104 let (mut output_file, mut output_file_path) =
106 create_new_output_file(output_directory, output_file_counter, output_name)?;
107 generated_output_files.push(output_file_path.clone());
108
109 for target_file_path in target_files {
110 current_target_file_name = match target_file_path.to_str() {
111 Some(name) => name.to_string(),
112 None => {
113 return Err(io::Error::new(
114 io::ErrorKind::InvalidData,
115 "Target File path contains invalid UTF-8 characters",
116 ));
117 }
118 };
119
120 let file = match File::open(target_file_path) {
121 Ok(file) => file,
122 Err(e) => {
123 eprintln!("Failed to open file {:?}: {}", target_file_path, e);
124 continue;
125 }
126 };
127
128 page_format = PageFormat::new(current_target_file_name, target_files_root_path);
129 check_max_output_file_size(&page_format, max_output_file_size)?;
130
131 if current_output_file_size + page_format.header_size + page_format.footer_size
132 > max_output_file_size
133 {
134 next_output_file(
135 output_directory,
136 &mut output_file_counter,
137 output_name,
138 &mut current_output_file_size,
139 &mut generated_output_files,
140 &mut output_file,
141 &mut output_file_path.clone(),
142 )?;
143 }
144
145 write!(output_file, "{}", page_format.header)?;
146 current_output_file_size += page_format.header_size;
147
148 let reader = BufReader::new(file);
149 for line_result in reader.lines() {
150 if line_result.is_err() {
151 eprintln!("Skipping non-text file: {:?}", target_file_path);
152 break;
153 }
154 let line = line_result.unwrap();
155 let line_size = line.as_bytes().len() as u64 + 1; if current_output_file_size + line_size + page_format.footer_size > max_output_file_size
158 {
159 write!(output_file, "{}", page_format.footer)?;
160
161 next_output_file(
162 output_directory,
163 &mut output_file_counter,
164 output_name,
165 &mut current_output_file_size,
166 &mut generated_output_files,
167 &mut output_file,
168 &mut output_file_path,
169 )?;
170
171 page_format.increment_page_number();
172 check_max_output_file_size(&page_format, max_output_file_size)?;
173 write!(output_file, "{}", page_format.header)?;
174 current_output_file_size += page_format.header_size;
175 }
176
177 writeln!(output_file, "{}", line)?;
178 current_output_file_size += line_size;
179 }
180 write!(output_file, "{}", page_format.footer)?;
181 }
182 Ok(generated_output_files)
183}
184
185#[cfg(test)]
186mod split_tests {
187 use super::*;
188 use std::fs::{self, File};
189 use std::io::Write;
190 use tempfile::tempdir;
191
192 #[test]
193 fn test_split_files_into_small_chunks() -> io::Result<()> {
194 let temp_dir = tempdir()?;
195 let max_output_file_size = 200;
196 let output_name = "output";
197 let num_test_files = 5;
198 let mut files = Vec::new();
199
200 for i in 0..num_test_files {
201 let file_path = temp_dir.path().join(format!("test_file_{}.txt", i));
202 let mut test_file = File::create(&file_path)?;
203 writeln!(test_file, "Test data for file {}", i)?;
204 files.push(file_path);
205 }
206
207 let output_directory = temp_dir.path();
208 let generated_output_files = split_files_into_chunks(
209 &files,
210 Some(temp_dir.path()),
211 output_directory,
212 max_output_file_size,
213 output_name,
214 )?;
215
216 assert!(!generated_output_files.is_empty());
217 assert_eq!(generated_output_files.len(), num_test_files);
218 for generated_file_path in generated_output_files {
219 let generated_file_content = fs::read_to_string(generated_file_path)?;
220 assert!(generated_file_content.contains("// START OF CODE BLOCK"));
221 assert!(generated_file_content.contains("// END OF CODE BLOCK"));
222 assert!(generated_file_content.len() as u64 <= max_output_file_size);
223 }
224
225 Ok(())
226 }
227
228 #[test]
229 fn test_split_files_into_single_large_chunk() -> io::Result<()> {
230 let temp_dir = tempdir()?;
231 let max_output_file_size = 3000;
232 let output_name = "output";
233 let num_test_files = 5;
234 let mut files = Vec::new();
235
236 for i in 0..num_test_files {
237 let file_path = temp_dir.path().join(format!("test_file_{}.txt", i));
238 let mut test_file = File::create(&file_path)?;
239 writeln!(test_file, "Test data for file {}", i)?;
240 files.push(file_path);
241 }
242
243 let output_directory = temp_dir.path();
244 let generated_output_files = split_files_into_chunks(
245 &files,
246 None,
247 output_directory,
248 max_output_file_size,
249 output_name,
250 )?;
251
252 assert!(!generated_output_files.is_empty());
253 assert_eq!(generated_output_files.len(), 1);
254 for generated_file_path in generated_output_files {
255 let generated_file_content = fs::read_to_string(generated_file_path)?;
256 assert!(generated_file_content.contains("// START OF CODE BLOCK"));
257 assert!(generated_file_content.contains("// END OF CODE BLOCK"));
258 assert!(generated_file_content.len() as u64 <= max_output_file_size);
259 }
260
261 Ok(())
262 }
263
264 #[test]
265 fn test_split_files_with_insufficient_size() -> io::Result<()> {
266 let temp_dir = tempdir()?;
267 let max_output_file_size = 10;
268 let output_name = "output";
269 let mut files = Vec::new();
270
271 let file_path = temp_dir.path().join("test_file.txt");
272 let mut test_file = File::create(&file_path)?;
273 writeln!(test_file, "Test data for file")?;
274 files.push(file_path);
275
276 let output_directory = temp_dir.path();
277 let result = split_files_into_chunks(
278 &files,
279 None,
280 output_directory,
281 max_output_file_size,
282 output_name,
283 );
284
285 assert!(result.is_err());
286 Ok(())
287 }
288
289 #[test]
290 fn test_split_binary_files_error() -> io::Result<()> {
291 let temp_dir = tempdir()?;
292 let max_output_file_size = 300;
293 let output_name = "output";
294 let mut files = Vec::new();
295
296 let file_path = temp_dir.path().join("test_file.bin");
297 let mut test_file = File::create(&file_path)?;
298 test_file.write_all(&[0, 159, 146, 150])?;
299 files.push(file_path);
300
301 let output_directory = temp_dir.path();
302 let generated_output_files = split_files_into_chunks(
303 &files,
304 None,
305 output_directory,
306 max_output_file_size,
307 output_name,
308 )?;
309
310 assert_eq!(generated_output_files.len(), 1);
311 for generated_file_path in generated_output_files {
312 let generated_file_content = fs::read_to_string(generated_file_path)?;
313 assert!(generated_file_content.contains("// START OF CODE BLOCK"));
314 assert!(generated_file_content.contains("// END OF CODE BLOCK"));
315 assert!(generated_file_content.len() as u64 <= max_output_file_size);
316 }
317
318 Ok(())
319 }
320}