1use std::fs::File;
7use std::io::{BufReader, Read, Seek};
8use std::path::Path;
9
10use thiserror::Error;
11use zip::ZipArchive;
12
13use crate::dat::{DatReader, ParsedLine};
14
15#[derive(Error, Debug)]
17pub enum ZipError {
18 #[error("I/O error: {0}")]
19 Io(#[from] std::io::Error),
20
21 #[error("ZIP error: {0}")]
22 Zip(#[from] zip::result::ZipError),
23
24 #[error("DAT file not found in archive: {0}")]
25 DatFileNotFound(String),
26}
27
28pub struct ZipExtractor<R: Read + Seek> {
30 archive: ZipArchive<R>,
31}
32
33impl ZipExtractor<BufReader<File>> {
34 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, ZipError> {
36 let file = File::open(path)?;
37 let reader = BufReader::new(file);
38 let archive = ZipArchive::new(reader)?;
39 Ok(Self { archive })
40 }
41
42 pub fn stats(&mut self) -> Result<ArchiveStats, ZipError> {
44 let dat_files = self.list_dat_files();
45 let total_files = self.archive.len();
46 let mut total_size = 0u64;
47
48 for i in 0..self.archive.len() {
49 let file = self.archive.by_index(i)?;
50 total_size += file.size();
51 }
52
53 Ok(ArchiveStats {
54 total_files,
55 dat_files,
56 total_size_bytes: total_size,
57 })
58 }
59
60 pub fn count_all_records(
62 &mut self,
63 ) -> Result<std::collections::HashMap<String, usize>, crate::ParseError> {
64 let dat_files = self.list_dat_files();
65 let mut counts = std::collections::HashMap::new();
66
67 for dat_file in dat_files {
68 let count = self.process_dat_streaming(&dat_file, |_| true)?;
69 counts.insert(dat_file, count);
70 }
71
72 Ok(counts)
73 }
74}
75
76impl<R: Read + Seek> ZipExtractor<R> {
77 pub fn new(archive: ZipArchive<R>) -> Self {
79 Self { archive }
80 }
81
82 pub fn list_dat_files(&mut self) -> Vec<String> {
84 let mut files = Vec::new();
85 for i in 0..self.archive.len() {
86 if let Ok(file) = self.archive.by_index(i) {
87 let name = file.name().to_string();
88 if name.to_uppercase().ends_with(".DAT") {
89 files.push(name);
90 }
91 }
92 }
93 files
94 }
95
96 pub fn list_files(&mut self) -> Vec<String> {
98 let mut files = Vec::new();
99 for i in 0..self.archive.len() {
100 if let Ok(file) = self.archive.by_index(i) {
101 files.push(file.name().to_string());
102 }
103 }
104 files
105 }
106
107 pub fn file_size(&mut self, name: &str) -> Result<u64, ZipError> {
109 let file = self.archive.by_name(name)?;
110 Ok(file.size())
111 }
112
113 fn find_file_index(&mut self, name: &str) -> Option<usize> {
115 for i in 0..self.archive.len() {
117 if let Ok(file) = self.archive.by_index(i) {
118 if file.name() == name {
119 return Some(i);
120 }
121 }
122 }
123
124 let name_upper = name.to_uppercase();
126 for i in 0..self.archive.len() {
127 if let Ok(file) = self.archive.by_index(i) {
128 if file.name().to_uppercase() == name_upper {
129 return Some(i);
130 }
131 }
132 }
133
134 None
135 }
136
137 pub fn stream_dat(&mut self, name: &str) -> Result<impl Read + '_, ZipError> {
140 let index = self
142 .find_file_index(name)
143 .ok_or_else(|| ZipError::DatFileNotFound(name.to_string()))?;
144
145 self.archive.by_index(index).map_err(ZipError::Zip)
146 }
147
148 pub fn process_dat_streaming<F>(
151 &mut self,
152 dat_name: &str,
153 mut callback: F,
154 ) -> Result<usize, crate::ParseError>
155 where
156 F: FnMut(ParsedLine) -> bool,
157 {
158 let reader = self.stream_dat(dat_name)?;
159 let mut dat_reader = DatReader::new(reader);
160 let mut count = 0;
161
162 while let Some(line) = dat_reader.next_line()? {
163 count += 1;
164 if !callback(line) {
165 break;
166 }
167 }
168
169 Ok(count)
170 }
171
172 pub fn get_file_creation_date(&mut self) -> Option<String> {
180 let index = self.find_file_index("counts")?;
182 let mut file = self.archive.by_index(index).ok()?;
183
184 let mut contents = String::new();
185 file.read_to_string(&mut contents).ok()?;
186
187 for line in contents.lines() {
189 if line.starts_with("File Creation Date:") {
190 let date_str = line.trim_start_matches("File Creation Date:").trim();
191 return Some(date_str.to_string());
192 }
193 }
194
195 None
196 }
197}
198
199impl From<ZipError> for crate::ParseError {
200 fn from(err: ZipError) -> Self {
201 match err {
202 ZipError::Io(e) => crate::ParseError::Io(e),
203 ZipError::Zip(e) => crate::ParseError::Zip(e),
204 ZipError::DatFileNotFound(name) => crate::ParseError::InvalidFormat {
205 line: 0,
206 message: format!("DAT file not found: {}", name),
207 },
208 }
209 }
210}
211
212#[derive(Debug, Clone)]
214pub struct ArchiveStats {
215 pub total_files: usize,
216 pub dat_files: Vec<String>,
217 pub total_size_bytes: u64,
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223 use std::io::{Cursor, Write};
224
225 fn create_test_zip() -> Vec<u8> {
226 let mut buf = Vec::new();
227 {
228 let cursor = Cursor::new(&mut buf);
229 let mut writer = zip::ZipWriter::new(cursor);
230
231 let options = zip::write::SimpleFileOptions::default()
232 .compression_method(zip::CompressionMethod::Stored);
233
234 writer.start_file("HD.dat", options).unwrap();
235 writer.write_all(b"HD|1|||TEST|A|HA|\n").unwrap();
236 writer.write_all(b"HD|2|||TEST2|A|HA|\n").unwrap();
237
238 writer.start_file("EN.dat", options).unwrap();
239 writer.write_all(b"EN|1|||TEST|L||John||\n").unwrap();
240
241 writer.finish().unwrap();
242 }
243 buf
244 }
245
246 fn create_zip_with_mixed_case() -> Vec<u8> {
247 let mut buf = Vec::new();
248 {
249 let cursor = Cursor::new(&mut buf);
250 let mut writer = zip::ZipWriter::new(cursor);
251
252 let options = zip::write::SimpleFileOptions::default()
253 .compression_method(zip::CompressionMethod::Stored);
254
255 writer.start_file("hd.DAT", options).unwrap();
257 writer.write_all(b"HD|1|||LOWERCASE|A|HA|\n").unwrap();
258
259 writer.start_file("en.Dat", options).unwrap();
260 writer.write_all(b"EN|1|||MIXEDCASE|L||Test||\n").unwrap();
261
262 writer.start_file("readme.txt", options).unwrap();
263 writer.write_all(b"Not a DAT file\n").unwrap();
264
265 writer.finish().unwrap();
266 }
267 buf
268 }
269
270 #[test]
271 fn test_list_dat_files() {
272 let data = create_test_zip();
273 let cursor = Cursor::new(data);
274 let archive = ZipArchive::new(cursor).unwrap();
275 let mut extractor = ZipExtractor::new(archive);
276
277 let files = extractor.list_dat_files();
278 assert_eq!(files.len(), 2);
279 assert!(files.contains(&"HD.dat".to_string()));
280 assert!(files.contains(&"EN.dat".to_string()));
281 }
282
283 #[test]
284 fn test_stream_dat() {
285 let data = create_test_zip();
286 let cursor = Cursor::new(data);
287 let archive = ZipArchive::new(cursor).unwrap();
288 let mut extractor = ZipExtractor::new(archive);
289
290 let count = extractor
291 .process_dat_streaming("HD.dat", |line| {
292 assert_eq!(line.record_type, "HD");
293 true
294 })
295 .unwrap();
296
297 assert_eq!(count, 2);
298 }
299
300 #[test]
301 fn test_list_all_files() {
302 let data = create_zip_with_mixed_case();
303 let cursor = Cursor::new(data);
304 let archive = ZipArchive::new(cursor).unwrap();
305 let mut extractor = ZipExtractor::new(archive);
306
307 let files = extractor.list_files();
308 assert_eq!(files.len(), 3);
309 assert!(files.contains(&"hd.DAT".to_string()));
310 assert!(files.contains(&"en.Dat".to_string()));
311 assert!(files.contains(&"readme.txt".to_string()));
312 }
313
314 #[test]
315 fn test_list_dat_files_mixed_case() {
316 let data = create_zip_with_mixed_case();
317 let cursor = Cursor::new(data);
318 let archive = ZipArchive::new(cursor).unwrap();
319 let mut extractor = ZipExtractor::new(archive);
320
321 let dat_files = extractor.list_dat_files();
323 assert_eq!(dat_files.len(), 2);
324 }
325
326 #[test]
327 fn test_file_size() {
328 let data = create_test_zip();
329 let cursor = Cursor::new(data);
330 let archive = ZipArchive::new(cursor).unwrap();
331 let mut extractor = ZipExtractor::new(archive);
332
333 let size = extractor.file_size("HD.dat").unwrap();
334 assert!(size > 0);
335 }
336
337 #[test]
338 fn test_file_size_not_found() {
339 let data = create_test_zip();
340 let cursor = Cursor::new(data);
341 let archive = ZipArchive::new(cursor).unwrap();
342 let mut extractor = ZipExtractor::new(archive);
343
344 let result = extractor.file_size("nonexistent.dat");
345 assert!(result.is_err());
346 }
347
348 #[test]
349 fn test_stream_dat_case_insensitive() {
350 let data = create_zip_with_mixed_case();
351 let cursor = Cursor::new(data);
352 let archive = ZipArchive::new(cursor).unwrap();
353 let mut extractor = ZipExtractor::new(archive);
354
355 let count = extractor
357 .process_dat_streaming("HD.dat", |line| {
358 assert_eq!(line.record_type, "HD");
359 true
360 })
361 .unwrap();
362
363 assert_eq!(count, 1);
364 }
365
366 #[test]
367 fn test_stream_dat_not_found() {
368 let data = create_test_zip();
369 let cursor = Cursor::new(data);
370 let archive = ZipArchive::new(cursor).unwrap();
371 let mut extractor = ZipExtractor::new(archive);
372
373 let result = extractor.stream_dat("NONEXISTENT.dat");
374 assert!(result.is_err());
375
376 match result {
377 Err(ZipError::DatFileNotFound(name)) => {
378 assert_eq!(name, "NONEXISTENT.dat");
379 }
380 _ => panic!("Expected DatFileNotFound error"),
381 }
382 }
383
384 #[test]
385 fn test_process_dat_early_termination() {
386 let data = create_test_zip();
387 let cursor = Cursor::new(data);
388 let archive = ZipArchive::new(cursor).unwrap();
389 let mut extractor = ZipExtractor::new(archive);
390
391 let mut processed = 0;
392 let count = extractor
393 .process_dat_streaming("HD.dat", |_line| {
394 processed += 1;
395 false })
397 .unwrap();
398
399 assert_eq!(count, 1);
401 assert_eq!(processed, 1);
402 }
403
404 #[test]
405 fn test_zip_error_to_parse_error() {
406 let err = ZipError::DatFileNotFound("test.dat".to_string());
407 let parse_err: crate::ParseError = err.into();
408 let msg = parse_err.to_string();
409 assert!(msg.contains("test.dat"));
410 }
411
412 #[test]
413 fn test_zip_error_io_conversion() {
414 let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "test error");
415 let zip_err = ZipError::from(io_err);
416 let parse_err: crate::ParseError = zip_err.into();
417 assert!(matches!(parse_err, crate::ParseError::Io(_)));
418 }
419
420 fn create_zip_with_counts() -> Vec<u8> {
421 let mut buf = Vec::new();
422 {
423 let cursor = Cursor::new(&mut buf);
424 let mut writer = zip::ZipWriter::new(cursor);
425
426 let options = zip::write::SimpleFileOptions::default()
427 .compression_method(zip::CompressionMethod::Stored);
428
429 writer.start_file("counts", options).unwrap();
431 writer
432 .write_all(b"File Creation Date: Sun Jan 18 12:01:25 EST 2026\n")
433 .unwrap();
434 writer
435 .write_all(b" 1669550 /home/pubacc/scripts/licweekzipdata/AM.dat\n")
436 .unwrap();
437
438 writer.start_file("HD.dat", options).unwrap();
439 writer.write_all(b"HD|1|||TEST|A|HA|\n").unwrap();
440
441 writer.finish().unwrap();
442 }
443 buf
444 }
445
446 #[test]
447 fn test_get_file_creation_date() {
448 let data = create_zip_with_counts();
449 let cursor = Cursor::new(data);
450 let archive = ZipArchive::new(cursor).unwrap();
451 let mut extractor = ZipExtractor::new(archive);
452
453 let date = extractor.get_file_creation_date();
454 assert!(date.is_some());
455 assert_eq!(date.unwrap(), "Sun Jan 18 12:01:25 EST 2026");
456 }
457
458 #[test]
459 fn test_get_file_creation_date_no_counts_file() {
460 let data = create_test_zip(); let cursor = Cursor::new(data);
462 let archive = ZipArchive::new(cursor).unwrap();
463 let mut extractor = ZipExtractor::new(archive);
464
465 let date = extractor.get_file_creation_date();
466 assert!(date.is_none());
467 }
468}