1use dashmap::DashMap;
7use scribe_core::{bytes_to_human, Result, ScribeError};
8use serde::{Deserialize, Serialize};
9use std::fs;
10use std::path::{Path, PathBuf};
11use std::time::{SystemTime, UNIX_EPOCH};
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct FileMetadata {
16 pub path: PathBuf,
17 pub size: u64,
18 pub size_human: String,
19 pub created: Option<u64>,
20 pub modified: Option<u64>,
21 pub accessed: Option<u64>,
22 pub readonly: bool,
23 pub hidden: bool,
24 pub executable: bool,
25 pub symlink: bool,
26 pub symlink_target: Option<PathBuf>,
27 pub permissions: u32,
28 pub file_type: FileSystemType,
29 pub inode: Option<u64>,
30 pub links: Option<u64>,
31 pub uid: Option<u32>,
32 pub gid: Option<u32>,
33}
34
35#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
37pub enum FileSystemType {
38 RegularFile,
39 SymbolicLink,
40 Directory,
41 FIFO,
42 Socket,
43 CharacterDevice,
44 BlockDevice,
45 Unknown,
46}
47
48#[derive(Debug, Clone, Default, Serialize, Deserialize)]
50pub struct SizeStats {
51 pub total_size: u64,
52 pub total_size_human: String,
53 pub file_count: usize,
54 pub average_size: u64,
55 pub median_size: u64,
56 pub min_size: u64,
57 pub max_size: u64,
58 pub size_distribution: SizeDistribution,
59}
60
61#[derive(Debug, Clone, Default, Serialize, Deserialize)]
63pub struct SizeDistribution {
64 pub tiny: usize, pub small: usize, pub medium: usize, pub large: usize, pub huge: usize, }
70
71pub struct MetadataExtractor {
73 cache: DashMap<PathBuf, FileMetadata>,
74 cache_enabled: bool,
75}
76
77impl Default for FileMetadata {
78 fn default() -> Self {
79 Self {
80 path: PathBuf::new(),
81 size: 0,
82 size_human: "0 B".to_string(),
83 created: None,
84 modified: None,
85 accessed: None,
86 readonly: false,
87 hidden: false,
88 executable: false,
89 symlink: false,
90 symlink_target: None,
91 permissions: 0,
92 file_type: FileSystemType::Unknown,
93 inode: None,
94 links: None,
95 uid: None,
96 gid: None,
97 }
98 }
99}
100
101impl MetadataExtractor {
102 pub fn new() -> Self {
104 Self {
105 cache: DashMap::new(),
106 cache_enabled: true,
107 }
108 }
109
110 pub fn without_cache() -> Self {
112 Self {
113 cache: DashMap::new(),
114 cache_enabled: false,
115 }
116 }
117
118 pub async fn extract_metadata(&self, path: &Path) -> Result<FileMetadata> {
120 if self.cache_enabled {
122 if let Some(cached) = self.cache.get(path) {
123 return Ok(cached.clone());
124 }
125 }
126
127 let metadata = self.extract_metadata_uncached(path).await?;
128
129 if self.cache_enabled {
131 self.cache.insert(path.to_path_buf(), metadata.clone());
132 }
133
134 Ok(metadata)
135 }
136
137 async fn extract_metadata_uncached(&self, path: &Path) -> Result<FileMetadata> {
139 let std_metadata = tokio::fs::symlink_metadata(path).await.map_err(|e| {
140 ScribeError::io(
141 format!("Failed to read metadata for {}: {}", path.display(), e),
142 e,
143 )
144 })?;
145
146 let size = std_metadata.len();
147 let size_human = bytes_to_human(size);
148
149 let created = system_time_to_timestamp(std_metadata.created().ok());
151 let modified = system_time_to_timestamp(std_metadata.modified().ok());
152 let accessed = system_time_to_timestamp(std_metadata.accessed().ok());
153
154 let file_type = classify_file_type(&std_metadata);
156
157 let (symlink, symlink_target) = if std_metadata.file_type().is_symlink() {
159 let target = tokio::fs::read_link(path).await.ok();
160 (true, target)
161 } else {
162 (false, None)
163 };
164
165 let (permissions, readonly, hidden, executable, inode, links, uid, gid) =
167 extract_platform_metadata(path, &std_metadata)?;
168
169 Ok(FileMetadata {
170 path: path.to_path_buf(),
171 size,
172 size_human,
173 created,
174 modified,
175 accessed,
176 readonly,
177 hidden,
178 executable,
179 symlink,
180 symlink_target,
181 permissions,
182 file_type,
183 inode,
184 links,
185 uid,
186 gid,
187 })
188 }
189
190 pub async fn extract_metadata_batch(&self, paths: &[PathBuf]) -> Vec<Result<FileMetadata>> {
192 let mut results = Vec::with_capacity(paths.len());
195 for path in paths {
196 results.push(self.extract_metadata(path).await);
197 }
198 results
199 }
200
201 pub fn calculate_size_stats(&self, files: &[FileMetadata]) -> SizeStats {
203 if files.is_empty() {
204 return SizeStats::default();
205 }
206
207 let mut sizes: Vec<u64> = files.iter().map(|f| f.size).collect();
208 sizes.sort_unstable();
209
210 let total_size = sizes.iter().sum();
211 let file_count = files.len();
212 let average_size = total_size / file_count as u64;
213 let median_size = if file_count % 2 == 0 {
214 (sizes[file_count / 2 - 1] + sizes[file_count / 2]) / 2
215 } else {
216 sizes[file_count / 2]
217 };
218
219 let min_size = sizes[0];
220 let max_size = sizes[sizes.len() - 1];
221
222 let mut distribution = SizeDistribution::default();
224 for &size in &sizes {
225 match size {
226 0..=1024 => distribution.tiny += 1,
227 1025..=10240 => distribution.small += 1,
228 10241..=102400 => distribution.medium += 1,
229 102401..=1048576 => distribution.large += 1,
230 _ => distribution.huge += 1,
231 }
232 }
233
234 SizeStats {
235 total_size,
236 total_size_human: bytes_to_human(total_size),
237 file_count,
238 average_size,
239 median_size,
240 min_size,
241 max_size,
242 size_distribution: distribution,
243 }
244 }
245
246 pub fn clear_cache(&self) {
248 self.cache.clear();
249 }
250
251 pub fn cache_stats(&self) -> (usize, usize) {
253 (self.cache.len(), self.cache.capacity())
254 }
255
256 pub fn is_likely_text_file(&self, metadata: &FileMetadata) -> bool {
258 if metadata.size > 10 * 1024 * 1024 {
260 return false;
262 }
263
264 matches!(
266 metadata.file_type,
267 FileSystemType::RegularFile | FileSystemType::SymbolicLink
268 )
269 }
270
271 pub fn is_recently_modified(&self, metadata: &FileMetadata, hours: u64) -> bool {
273 if let Some(modified) = metadata.modified {
274 let now = SystemTime::now()
275 .duration_since(UNIX_EPOCH)
276 .unwrap()
277 .as_secs();
278 let threshold = hours * 3600;
279
280 now.saturating_sub(modified) <= threshold
281 } else {
282 false
283 }
284 }
285}
286
287impl Default for MetadataExtractor {
288 fn default() -> Self {
289 Self::new()
290 }
291}
292
293fn system_time_to_timestamp(time: Option<SystemTime>) -> Option<u64> {
295 time.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
296 .map(|d| d.as_secs())
297}
298
299fn classify_file_type(metadata: &fs::Metadata) -> FileSystemType {
301 let file_type = metadata.file_type();
302
303 if file_type.is_file() {
304 FileSystemType::RegularFile
305 } else if file_type.is_dir() {
306 FileSystemType::Directory
307 } else if file_type.is_symlink() {
308 FileSystemType::SymbolicLink
309 } else {
310 #[cfg(unix)]
312 {
313 use std::os::unix::fs::FileTypeExt;
314 if file_type.is_fifo() {
315 return FileSystemType::FIFO;
316 } else if file_type.is_socket() {
317 return FileSystemType::Socket;
318 } else if file_type.is_char_device() {
319 return FileSystemType::CharacterDevice;
320 } else if file_type.is_block_device() {
321 return FileSystemType::BlockDevice;
322 }
323 }
324
325 FileSystemType::Unknown
326 }
327}
328
329#[cfg(unix)]
331fn extract_platform_metadata(
332 path: &Path,
333 metadata: &fs::Metadata,
334) -> Result<(
335 u32,
336 bool,
337 bool,
338 bool,
339 Option<u64>,
340 Option<u64>,
341 Option<u32>,
342 Option<u32>,
343)> {
344 use std::os::unix::fs::{MetadataExt, PermissionsExt};
345
346 let permissions = metadata.permissions().mode();
347 let readonly = !metadata.permissions().readonly();
348
349 let hidden = path
351 .file_name()
352 .and_then(|name| name.to_str())
353 .map_or(false, |name| name.starts_with('.'));
354
355 let executable = permissions & 0o111 != 0;
357
358 let inode = Some(metadata.ino());
359 let links = Some(metadata.nlink());
360 let uid = Some(metadata.uid());
361 let gid = Some(metadata.gid());
362
363 Ok((
364 permissions,
365 readonly,
366 hidden,
367 executable,
368 inode,
369 links,
370 uid,
371 gid,
372 ))
373}
374
375#[cfg(windows)]
377fn extract_platform_metadata(
378 path: &Path,
379 metadata: &fs::Metadata,
380) -> Result<(
381 u32,
382 bool,
383 bool,
384 bool,
385 Option<u64>,
386 Option<u64>,
387 Option<u32>,
388 Option<u32>,
389)> {
390 use std::os::windows::fs::MetadataExt;
391
392 let permissions = 0; let readonly = metadata.permissions().readonly();
394
395 let hidden = metadata.file_attributes() & 0x2 != 0;
397
398 let executable = path
400 .extension()
401 .and_then(|ext| ext.to_str())
402 .map_or(false, |ext| {
403 matches!(
404 ext.to_lowercase().as_str(),
405 "exe" | "bat" | "cmd" | "com" | "scr"
406 )
407 });
408
409 let inode = None;
411 let links = None;
412 let uid = None;
413 let gid = None;
414
415 Ok((
416 permissions,
417 readonly,
418 hidden,
419 executable,
420 inode,
421 links,
422 uid,
423 gid,
424 ))
425}
426
427impl SizeStats {
428 pub fn from_sizes(sizes: &[u64]) -> Self {
430 let mut extractor = MetadataExtractor::new();
431 let fake_metadata: Vec<FileMetadata> = sizes
432 .iter()
433 .enumerate()
434 .map(|(i, &size)| FileMetadata {
435 path: PathBuf::from(format!("file_{}", i)),
436 size,
437 size_human: bytes_to_human(size),
438 ..Default::default()
439 })
440 .collect();
441
442 extractor.calculate_size_stats(&fake_metadata)
443 }
444
445 pub fn summary(&self) -> String {
447 format!(
448 "Files: {}, Total: {}, Avg: {}, Range: {} - {}",
449 self.file_count,
450 self.total_size_human,
451 bytes_to_human(self.average_size),
452 bytes_to_human(self.min_size),
453 bytes_to_human(self.max_size)
454 )
455 }
456
457 pub fn distribution_summary(&self) -> String {
459 format!(
460 "Tiny: {}, Small: {}, Medium: {}, Large: {}, Huge: {}",
461 self.size_distribution.tiny,
462 self.size_distribution.small,
463 self.size_distribution.medium,
464 self.size_distribution.large,
465 self.size_distribution.huge
466 )
467 }
468}
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473 use std::fs;
474 use tempfile::TempDir;
475 use tokio::fs as async_fs;
476
477 #[tokio::test]
478 async fn test_metadata_extraction() {
479 let temp_dir = TempDir::new().unwrap();
480 let test_file = temp_dir.path().join("test.txt");
481
482 let content = "Hello, world! This is a test file.";
483 fs::write(&test_file, content).unwrap();
484
485 let mut extractor = MetadataExtractor::new();
486 let metadata = extractor.extract_metadata(&test_file).await.unwrap();
487
488 assert_eq!(metadata.path, test_file);
489 assert_eq!(metadata.size, content.len() as u64);
490 assert!(!metadata.size_human.is_empty());
491 assert!(metadata.modified.is_some());
492 assert_eq!(metadata.file_type, FileSystemType::RegularFile);
493 assert!(!metadata.symlink);
494 }
495
496 #[tokio::test]
497 async fn test_symlink_detection() {
498 let temp_dir = TempDir::new().unwrap();
499 let original_file = temp_dir.path().join("original.txt");
500 let symlink_file = temp_dir.path().join("link.txt");
501
502 fs::write(&original_file, "original content").unwrap();
503
504 #[cfg(unix)]
505 {
506 std::os::unix::fs::symlink(&original_file, &symlink_file).unwrap();
507
508 let mut extractor = MetadataExtractor::new();
509 let metadata = extractor.extract_metadata(&symlink_file).await.unwrap();
510
511 assert!(metadata.symlink);
512 assert_eq!(metadata.symlink_target, Some(original_file));
513 }
514 }
515
516 #[tokio::test]
517 async fn test_batch_metadata_extraction() {
518 let temp_dir = TempDir::new().unwrap();
519 let mut file_paths = Vec::new();
520
521 for i in 0..5 {
523 let file_path = temp_dir.path().join(format!("test_{}.txt", i));
524 fs::write(&file_path, format!("Content for file {}", i)).unwrap();
525 file_paths.push(file_path);
526 }
527
528 let mut extractor = MetadataExtractor::new();
529 let results = extractor.extract_metadata_batch(&file_paths).await;
530
531 assert_eq!(results.len(), 5);
532 for result in results {
533 assert!(result.is_ok());
534 let metadata = result.unwrap();
535 assert_eq!(metadata.file_type, FileSystemType::RegularFile);
536 assert!(metadata.size > 0);
537 }
538 }
539
540 #[tokio::test]
541 async fn test_size_statistics() {
542 let temp_dir = TempDir::new().unwrap();
543 let mut files = Vec::new();
544
545 let sizes = [100, 500, 1500, 5000, 50000];
547 for (i, &size) in sizes.iter().enumerate() {
548 let file_path = temp_dir.path().join(format!("test_{}.txt", i));
549 let content = "x".repeat(size);
550 fs::write(&file_path, content).unwrap();
551
552 let mut extractor = MetadataExtractor::new();
553 let metadata = extractor.extract_metadata(&file_path).await.unwrap();
554 files.push(metadata);
555 }
556
557 let extractor = MetadataExtractor::new();
558 let stats = extractor.calculate_size_stats(&files);
559
560 assert_eq!(stats.file_count, 5);
561 assert_eq!(stats.total_size, sizes.iter().sum::<usize>() as u64);
562 assert_eq!(stats.min_size, 100);
563 assert_eq!(stats.max_size, 50000);
564
565 assert_eq!(stats.size_distribution.tiny, 2); assert_eq!(stats.size_distribution.small, 2); assert_eq!(stats.size_distribution.medium, 1); assert_eq!(stats.size_distribution.large, 0); assert_eq!(stats.size_distribution.huge, 0);
571 }
572
573 #[test]
574 fn test_size_stats_from_sizes() {
575 let sizes = [1000, 2000, 3000, 4000, 5000];
576 let stats = SizeStats::from_sizes(&sizes);
577
578 assert_eq!(stats.file_count, 5);
579 assert_eq!(stats.total_size, 15000);
580 assert_eq!(stats.average_size, 3000);
581 assert_eq!(stats.median_size, 3000);
582 assert_eq!(stats.min_size, 1000);
583 assert_eq!(stats.max_size, 5000);
584 }
585
586 #[test]
587 fn test_size_distribution() {
588 let sizes = [
589 500, 5000, 50000, 500000, 5000000, ];
595 let stats = SizeStats::from_sizes(&sizes);
596
597 assert_eq!(stats.size_distribution.tiny, 1);
598 assert_eq!(stats.size_distribution.small, 1);
599 assert_eq!(stats.size_distribution.medium, 1);
600 assert_eq!(stats.size_distribution.large, 1);
601 assert_eq!(stats.size_distribution.huge, 1);
602 }
603
604 #[tokio::test]
605 async fn test_cache_functionality() {
606 let temp_dir = TempDir::new().unwrap();
607 let test_file = temp_dir.path().join("test.txt");
608 fs::write(&test_file, "test content").unwrap();
609
610 let mut extractor = MetadataExtractor::new();
611
612 let metadata1 = extractor.extract_metadata(&test_file).await.unwrap();
614 let (cache_size, _) = extractor.cache_stats();
615 assert_eq!(cache_size, 1);
616
617 let metadata2 = extractor.extract_metadata(&test_file).await.unwrap();
619 assert_eq!(metadata1.size, metadata2.size);
620 assert_eq!(metadata1.modified, metadata2.modified);
621
622 extractor.clear_cache();
624 let (cache_size, _) = extractor.cache_stats();
625 assert_eq!(cache_size, 0);
626 }
627
628 #[tokio::test]
629 async fn test_recently_modified() {
630 let temp_dir = TempDir::new().unwrap();
631 let test_file = temp_dir.path().join("test.txt");
632 fs::write(&test_file, "test content").unwrap();
633
634 let mut extractor = MetadataExtractor::new();
635 let metadata = extractor.extract_metadata(&test_file).await.unwrap();
636
637 assert!(extractor.is_recently_modified(&metadata, 1));
639
640 assert!(extractor.is_recently_modified(&metadata, 24));
642 }
643
644 #[test]
645 fn test_file_type_classification() {
646 let sizes = [1000];
648 let stats = SizeStats::from_sizes(&sizes);
649
650 assert_eq!(stats.file_count, 1);
652 assert_eq!(stats.total_size, 1000);
653 }
654}