dsc 0.1.3

dsc is a cli tool for finding and removing duplicate files on one or multiple file systems, while respecting your gitignore rules.
use std::cmp::Reverse;
use std::io;

use anyhow::Result;
use num_traits::ToPrimitive;
use rustc_hash::FxHashMap;
use serde::Serialize;

use crate::file_descriptor::DeviceDescriptor;
use crate::report::report_writer::ReportWriter;
use crate::types::Duplicate;

pub struct CSVWriter {}

impl ReportWriter for CSVWriter {
    fn write(&mut self, duplicates: &mut Vec<Duplicate>, write: &mut dyn io::Write) -> Result<()> {
        let mut writer = csv::WriterBuilder::new()
            .quote_style(csv::QuoteStyle::NonNumeric)
            .from_writer(write);

        duplicates.sort_by_key(|duplicate| {
            Reverse(duplicate.file_size * (duplicate.locations.len() as u64 - 1))
        });

        let mut known_descriptors: FxHashMap<DeviceDescriptor, u32> = FxHashMap::default();

        for (id, duplicate) in duplicates.iter().enumerate() {
            let file_size = duplicate.file_size;

            for (file_id, descriptor) in duplicate.locations.iter().enumerate() {
                let device_descriptor = descriptor.file_descriptor.device_descriptor;

                let device = match known_descriptors.get(&device_descriptor) {
                    Some(&id) => id,
                    None => {
                        let id = known_descriptors.len().to_u32().unwrap_or(0);
                        known_descriptors.insert(device_descriptor, id);
                        id
                    }
                };

                for path in &descriptor.paths {
                    let record = DuplicateRecord {
                        duplicate: id as u64,
                        identity: file_id as u64,
                        device,
                        file_name: path.to_string_lossy().into_owned(),
                        file_size,
                    };
                    writer.serialize(record)?
                }
            }
        }
        writer.flush()?;

        Ok(())
    }
}

#[derive(Serialize)]
struct DuplicateRecord {
    duplicate: u64,
    identity: u64,
    device: u32,
    file_name: String,
    file_size: u64,
}