1use scribe_core::{Result, ScribeError, bytes_to_human};
7use std::path::{Path, PathBuf};
8use std::time::{SystemTime, UNIX_EPOCH};
9use std::fs;
10use std::cell::RefCell;
11use serde::{Serialize, Deserialize};
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct FileMetadata {
16 pub path: PathBuf,
17 pub size: u64,
18 pub size_human: String,
19 pub created: Option<u64>,
20 pub modified: Option<u64>,
21 pub accessed: Option<u64>,
22 pub readonly: bool,
23 pub hidden: bool,
24 pub executable: bool,
25 pub symlink: bool,
26 pub symlink_target: Option<PathBuf>,
27 pub permissions: u32,
28 pub file_type: FileSystemType,
29 pub inode: Option<u64>,
30 pub links: Option<u64>,
31 pub uid: Option<u32>,
32 pub gid: Option<u32>,
33}
34
35#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
37pub enum FileSystemType {
38 RegularFile,
39 SymbolicLink,
40 Directory,
41 FIFO,
42 Socket,
43 CharacterDevice,
44 BlockDevice,
45 Unknown,
46}
47
48#[derive(Debug, Clone, Default, Serialize, Deserialize)]
50pub struct SizeStats {
51 pub total_size: u64,
52 pub total_size_human: String,
53 pub file_count: usize,
54 pub average_size: u64,
55 pub median_size: u64,
56 pub min_size: u64,
57 pub max_size: u64,
58 pub size_distribution: SizeDistribution,
59}
60
61#[derive(Debug, Clone, Default, Serialize, Deserialize)]
63pub struct SizeDistribution {
64 pub tiny: usize, pub small: usize, pub medium: usize, pub large: usize, pub huge: usize, }
70
71pub struct MetadataExtractor {
73 cache: RefCell<std::collections::HashMap<PathBuf, FileMetadata>>,
74 cache_enabled: bool,
75}
76
77impl Default for FileMetadata {
78 fn default() -> Self {
79 Self {
80 path: PathBuf::new(),
81 size: 0,
82 size_human: "0 B".to_string(),
83 created: None,
84 modified: None,
85 accessed: None,
86 readonly: false,
87 hidden: false,
88 executable: false,
89 symlink: false,
90 symlink_target: None,
91 permissions: 0,
92 file_type: FileSystemType::Unknown,
93 inode: None,
94 links: None,
95 uid: None,
96 gid: None,
97 }
98 }
99}
100
101impl MetadataExtractor {
102 pub fn new() -> Self {
104 Self {
105 cache: RefCell::new(std::collections::HashMap::new()),
106 cache_enabled: true,
107 }
108 }
109
110 pub fn without_cache() -> Self {
112 Self {
113 cache: RefCell::new(std::collections::HashMap::new()),
114 cache_enabled: false,
115 }
116 }
117
118 pub async fn extract_metadata(&self, path: &Path) -> Result<FileMetadata> {
120 if self.cache_enabled {
122 if let Some(cached) = self.cache.borrow().get(path) {
123 return Ok(cached.clone());
124 }
125 }
126
127 let metadata = self.extract_metadata_uncached(path).await?;
128
129 if self.cache_enabled {
131 self.cache.borrow_mut().insert(path.to_path_buf(), metadata.clone());
132 }
133
134 Ok(metadata)
135 }
136
137 async fn extract_metadata_uncached(&self, path: &Path) -> Result<FileMetadata> {
139 let std_metadata = fs::symlink_metadata(path)
140 .map_err(|e| ScribeError::io(format!("Failed to read metadata for {}: {}", path.display(), e), e))?;
141
142 let size = std_metadata.len();
143 let size_human = bytes_to_human(size);
144
145 let created = system_time_to_timestamp(std_metadata.created().ok());
147 let modified = system_time_to_timestamp(std_metadata.modified().ok());
148 let accessed = system_time_to_timestamp(std_metadata.accessed().ok());
149
150 let file_type = classify_file_type(&std_metadata);
152
153 let (symlink, symlink_target) = if std_metadata.file_type().is_symlink() {
155 let target = fs::read_link(path).ok();
156 (true, target)
157 } else {
158 (false, None)
159 };
160
161 let (permissions, readonly, hidden, executable, inode, links, uid, gid) =
163 extract_platform_metadata(path, &std_metadata)?;
164
165 Ok(FileMetadata {
166 path: path.to_path_buf(),
167 size,
168 size_human,
169 created,
170 modified,
171 accessed,
172 readonly,
173 hidden,
174 executable,
175 symlink,
176 symlink_target,
177 permissions,
178 file_type,
179 inode,
180 links,
181 uid,
182 gid,
183 })
184 }
185
186 pub async fn extract_metadata_batch(&self, paths: &[PathBuf]) -> Vec<Result<FileMetadata>> {
188 let mut results = Vec::with_capacity(paths.len());
191 for path in paths {
192 results.push(self.extract_metadata(path).await);
193 }
194 results
195 }
196
197 pub fn calculate_size_stats(&self, files: &[FileMetadata]) -> SizeStats {
199 if files.is_empty() {
200 return SizeStats::default();
201 }
202
203 let mut sizes: Vec<u64> = files.iter().map(|f| f.size).collect();
204 sizes.sort_unstable();
205
206 let total_size = sizes.iter().sum();
207 let file_count = files.len();
208 let average_size = total_size / file_count as u64;
209 let median_size = if file_count % 2 == 0 {
210 (sizes[file_count / 2 - 1] + sizes[file_count / 2]) / 2
211 } else {
212 sizes[file_count / 2]
213 };
214
215 let min_size = sizes[0];
216 let max_size = sizes[sizes.len() - 1];
217
218 let mut distribution = SizeDistribution::default();
220 for &size in &sizes {
221 match size {
222 0..=1024 => distribution.tiny += 1,
223 1025..=10240 => distribution.small += 1,
224 10241..=102400 => distribution.medium += 1,
225 102401..=1048576 => distribution.large += 1,
226 _ => distribution.huge += 1,
227 }
228 }
229
230 SizeStats {
231 total_size,
232 total_size_human: bytes_to_human(total_size),
233 file_count,
234 average_size,
235 median_size,
236 min_size,
237 max_size,
238 size_distribution: distribution,
239 }
240 }
241
242 pub fn clear_cache(&self) {
244 self.cache.borrow_mut().clear();
245 }
246
247 pub fn cache_stats(&self) -> (usize, usize) {
249 let cache = self.cache.borrow();
250 (cache.len(), cache.capacity())
251 }
252
253 pub fn is_likely_text_file(&self, metadata: &FileMetadata) -> bool {
255 if metadata.size > 10 * 1024 * 1024 { return false;
258 }
259
260 matches!(metadata.file_type,
262 FileSystemType::RegularFile | FileSystemType::SymbolicLink)
263 }
264
265 pub fn is_recently_modified(&self, metadata: &FileMetadata, hours: u64) -> bool {
267 if let Some(modified) = metadata.modified {
268 let now = SystemTime::now()
269 .duration_since(UNIX_EPOCH)
270 .unwrap()
271 .as_secs();
272 let threshold = hours * 3600;
273
274 now.saturating_sub(modified) <= threshold
275 } else {
276 false
277 }
278 }
279}
280
281impl Default for MetadataExtractor {
282 fn default() -> Self {
283 Self::new()
284 }
285}
286
287fn system_time_to_timestamp(time: Option<SystemTime>) -> Option<u64> {
289 time.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
290 .map(|d| d.as_secs())
291}
292
293fn classify_file_type(metadata: &fs::Metadata) -> FileSystemType {
295 let file_type = metadata.file_type();
296
297 if file_type.is_file() {
298 FileSystemType::RegularFile
299 } else if file_type.is_dir() {
300 FileSystemType::Directory
301 } else if file_type.is_symlink() {
302 FileSystemType::SymbolicLink
303 } else {
304 #[cfg(unix)]
306 {
307 use std::os::unix::fs::FileTypeExt;
308 if file_type.is_fifo() {
309 return FileSystemType::FIFO;
310 } else if file_type.is_socket() {
311 return FileSystemType::Socket;
312 } else if file_type.is_char_device() {
313 return FileSystemType::CharacterDevice;
314 } else if file_type.is_block_device() {
315 return FileSystemType::BlockDevice;
316 }
317 }
318
319 FileSystemType::Unknown
320 }
321}
322
323#[cfg(unix)]
325fn extract_platform_metadata(path: &Path, metadata: &fs::Metadata) -> Result<(u32, bool, bool, bool, Option<u64>, Option<u64>, Option<u32>, Option<u32>)> {
326 use std::os::unix::fs::{MetadataExt, PermissionsExt};
327
328 let permissions = metadata.permissions().mode();
329 let readonly = !metadata.permissions().readonly();
330
331 let hidden = path.file_name()
333 .and_then(|name| name.to_str())
334 .map_or(false, |name| name.starts_with('.'));
335
336 let executable = permissions & 0o111 != 0;
338
339 let inode = Some(metadata.ino());
340 let links = Some(metadata.nlink());
341 let uid = Some(metadata.uid());
342 let gid = Some(metadata.gid());
343
344 Ok((permissions, readonly, hidden, executable, inode, links, uid, gid))
345}
346
347#[cfg(windows)]
349fn extract_platform_metadata(path: &Path, metadata: &fs::Metadata) -> Result<(u32, bool, bool, bool, Option<u64>, Option<u64>, Option<u32>, Option<u32>)> {
350 use std::os::windows::fs::MetadataExt;
351
352 let permissions = 0; let readonly = metadata.permissions().readonly();
354
355 let hidden = metadata.file_attributes() & 0x2 != 0;
357
358 let executable = path.extension()
360 .and_then(|ext| ext.to_str())
361 .map_or(false, |ext| {
362 matches!(ext.to_lowercase().as_str(), "exe" | "bat" | "cmd" | "com" | "scr")
363 });
364
365 let inode = None;
367 let links = None;
368 let uid = None;
369 let gid = None;
370
371 Ok((permissions, readonly, hidden, executable, inode, links, uid, gid))
372}
373
374impl SizeStats {
375 pub fn from_sizes(sizes: &[u64]) -> Self {
377 let mut extractor = MetadataExtractor::new();
378 let fake_metadata: Vec<FileMetadata> = sizes.iter()
379 .enumerate()
380 .map(|(i, &size)| FileMetadata {
381 path: PathBuf::from(format!("file_{}", i)),
382 size,
383 size_human: bytes_to_human(size),
384 ..Default::default()
385 })
386 .collect();
387
388 extractor.calculate_size_stats(&fake_metadata)
389 }
390
391 pub fn summary(&self) -> String {
393 format!(
394 "Files: {}, Total: {}, Avg: {}, Range: {} - {}",
395 self.file_count,
396 self.total_size_human,
397 bytes_to_human(self.average_size),
398 bytes_to_human(self.min_size),
399 bytes_to_human(self.max_size)
400 )
401 }
402
403 pub fn distribution_summary(&self) -> String {
405 format!(
406 "Tiny: {}, Small: {}, Medium: {}, Large: {}, Huge: {}",
407 self.size_distribution.tiny,
408 self.size_distribution.small,
409 self.size_distribution.medium,
410 self.size_distribution.large,
411 self.size_distribution.huge
412 )
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419 use tempfile::TempDir;
420 use std::fs;
421 use tokio::fs as async_fs;
422
423 #[tokio::test]
424 async fn test_metadata_extraction() {
425 let temp_dir = TempDir::new().unwrap();
426 let test_file = temp_dir.path().join("test.txt");
427
428 let content = "Hello, world! This is a test file.";
429 fs::write(&test_file, content).unwrap();
430
431 let mut extractor = MetadataExtractor::new();
432 let metadata = extractor.extract_metadata(&test_file).await.unwrap();
433
434 assert_eq!(metadata.path, test_file);
435 assert_eq!(metadata.size, content.len() as u64);
436 assert!(!metadata.size_human.is_empty());
437 assert!(metadata.modified.is_some());
438 assert_eq!(metadata.file_type, FileSystemType::RegularFile);
439 assert!(!metadata.symlink);
440 }
441
442 #[tokio::test]
443 async fn test_symlink_detection() {
444 let temp_dir = TempDir::new().unwrap();
445 let original_file = temp_dir.path().join("original.txt");
446 let symlink_file = temp_dir.path().join("link.txt");
447
448 fs::write(&original_file, "original content").unwrap();
449
450 #[cfg(unix)]
451 {
452 std::os::unix::fs::symlink(&original_file, &symlink_file).unwrap();
453
454 let mut extractor = MetadataExtractor::new();
455 let metadata = extractor.extract_metadata(&symlink_file).await.unwrap();
456
457 assert!(metadata.symlink);
458 assert_eq!(metadata.symlink_target, Some(original_file));
459 }
460 }
461
462 #[tokio::test]
463 async fn test_batch_metadata_extraction() {
464 let temp_dir = TempDir::new().unwrap();
465 let mut file_paths = Vec::new();
466
467 for i in 0..5 {
469 let file_path = temp_dir.path().join(format!("test_{}.txt", i));
470 fs::write(&file_path, format!("Content for file {}", i)).unwrap();
471 file_paths.push(file_path);
472 }
473
474 let mut extractor = MetadataExtractor::new();
475 let results = extractor.extract_metadata_batch(&file_paths).await;
476
477 assert_eq!(results.len(), 5);
478 for result in results {
479 assert!(result.is_ok());
480 let metadata = result.unwrap();
481 assert_eq!(metadata.file_type, FileSystemType::RegularFile);
482 assert!(metadata.size > 0);
483 }
484 }
485
486 #[tokio::test]
487 async fn test_size_statistics() {
488 let temp_dir = TempDir::new().unwrap();
489 let mut files = Vec::new();
490
491 let sizes = [100, 500, 1500, 5000, 50000];
493 for (i, &size) in sizes.iter().enumerate() {
494 let file_path = temp_dir.path().join(format!("test_{}.txt", i));
495 let content = "x".repeat(size);
496 fs::write(&file_path, content).unwrap();
497
498 let mut extractor = MetadataExtractor::new();
499 let metadata = extractor.extract_metadata(&file_path).await.unwrap();
500 files.push(metadata);
501 }
502
503 let extractor = MetadataExtractor::new();
504 let stats = extractor.calculate_size_stats(&files);
505
506 assert_eq!(stats.file_count, 5);
507 assert_eq!(stats.total_size, sizes.iter().sum::<usize>() as u64);
508 assert_eq!(stats.min_size, 100);
509 assert_eq!(stats.max_size, 50000);
510
511 assert_eq!(stats.size_distribution.tiny, 2); assert_eq!(stats.size_distribution.small, 2); assert_eq!(stats.size_distribution.medium, 1); assert_eq!(stats.size_distribution.large, 0); assert_eq!(stats.size_distribution.huge, 0);
517 }
518
519 #[test]
520 fn test_size_stats_from_sizes() {
521 let sizes = [1000, 2000, 3000, 4000, 5000];
522 let stats = SizeStats::from_sizes(&sizes);
523
524 assert_eq!(stats.file_count, 5);
525 assert_eq!(stats.total_size, 15000);
526 assert_eq!(stats.average_size, 3000);
527 assert_eq!(stats.median_size, 3000);
528 assert_eq!(stats.min_size, 1000);
529 assert_eq!(stats.max_size, 5000);
530 }
531
532 #[test]
533 fn test_size_distribution() {
534 let sizes = [
535 500, 5000, 50000, 500000, 5000000, ];
541 let stats = SizeStats::from_sizes(&sizes);
542
543 assert_eq!(stats.size_distribution.tiny, 1);
544 assert_eq!(stats.size_distribution.small, 1);
545 assert_eq!(stats.size_distribution.medium, 1);
546 assert_eq!(stats.size_distribution.large, 1);
547 assert_eq!(stats.size_distribution.huge, 1);
548 }
549
550 #[tokio::test]
551 async fn test_cache_functionality() {
552 let temp_dir = TempDir::new().unwrap();
553 let test_file = temp_dir.path().join("test.txt");
554 fs::write(&test_file, "test content").unwrap();
555
556 let mut extractor = MetadataExtractor::new();
557
558 let metadata1 = extractor.extract_metadata(&test_file).await.unwrap();
560 let (cache_size, _) = extractor.cache_stats();
561 assert_eq!(cache_size, 1);
562
563 let metadata2 = extractor.extract_metadata(&test_file).await.unwrap();
565 assert_eq!(metadata1.size, metadata2.size);
566 assert_eq!(metadata1.modified, metadata2.modified);
567
568 extractor.clear_cache();
570 let (cache_size, _) = extractor.cache_stats();
571 assert_eq!(cache_size, 0);
572 }
573
574 #[tokio::test]
575 async fn test_recently_modified() {
576 let temp_dir = TempDir::new().unwrap();
577 let test_file = temp_dir.path().join("test.txt");
578 fs::write(&test_file, "test content").unwrap();
579
580 let mut extractor = MetadataExtractor::new();
581 let metadata = extractor.extract_metadata(&test_file).await.unwrap();
582
583 assert!(extractor.is_recently_modified(&metadata, 1));
585
586 assert!(extractor.is_recently_modified(&metadata, 24));
588 }
589
590 #[test]
591 fn test_file_type_classification() {
592 let sizes = [1000];
594 let stats = SizeStats::from_sizes(&sizes);
595
596 assert_eq!(stats.file_count, 1);
598 assert_eq!(stats.total_size, 1000);
599 }
600}