dsc 0.1.3

dsc is a cli tool for finding and removing duplicate files on one or multiple file systems, while respecting your gitignore rules.
use anyhow::Result;
use serde::Serialize;
use std::cmp::Reverse;
use std::io;

use crate::file_descriptor::DeviceDescriptor;
use crate::report::report_writer::ReportWriter;
use crate::types::Duplicate;
use num_traits::ToPrimitive;
use rustc_hash::FxHashMap;

pub struct JSONWriter {}

impl ReportWriter for JSONWriter {
    fn write(&mut self, duplicates: &mut Vec<Duplicate>, write: &mut dyn io::Write) -> Result<()> {
        duplicates.sort_by_key(|duplicate| {
            Reverse(duplicate.file_size * (duplicate.locations.len() as u64 - 1))
        });

        let mut known_descriptors: FxHashMap<DeviceDescriptor, u32> = FxHashMap::default();

        let total_size = duplicates
            .iter()
            .map(|d| d.file_size * (d.locations.len() as u64 - 1))
            .sum();

        let duplicates = duplicates
            .iter()
            .map(|d| {
                let total_size = (d.locations.len() as u64 - 1) * d.file_size;
                let file_size = d.file_size;

                let mut files: FxHashMap<u32, Vec<Vec<String>>> = FxHashMap::default();

                for descriptor in &d.locations {
                    let device_descriptor = descriptor.file_descriptor.device_descriptor;

                    let device = match known_descriptors.get(&device_descriptor) {
                        Some(&id) => id,
                        None => {
                            let id = known_descriptors.len().to_u32().unwrap_or(0);
                            known_descriptors.insert(device_descriptor, id);
                            id
                        }
                    };

                    let paths: Vec<String> = descriptor
                        .paths
                        .iter()
                        .map(|path| path.to_string_lossy().into_owned())
                        .collect();

                    if let Some(list) = files.get_mut(&device) {
                        list.push(paths);
                    } else {
                        let list = vec![paths];
                        files.insert(device, list);
                    }
                }

                DuplicateEntry {
                    total_size,
                    file_size,
                    files,
                }
            })
            .collect();

        let report = DuplicateReport {
            total_size,
            duplicates,
        };

        serde_json::to_writer(write, &report)?;

        Ok(())
    }
}

#[derive(Serialize)]
struct DuplicateReport {
    total_size: u64,
    duplicates: Vec<DuplicateEntry>,
}

#[derive(Serialize)]
struct DuplicateEntry {
    total_size: u64,
    file_size: u64,
    files: FxHashMap<u32, Vec<Vec<String>>>,
}