use std::io;
use chrono::{DateTime, Utc};
use serde::Serialize;
use thiserror::Error;
use crate::duplicates::DuplicateGroup;
#[derive(Debug, Error)]
pub enum CsvOutputError {
#[error("I/O error: {0}")]
Io(#[from] io::Error),
#[error("CSV error: {0}")]
Csv(#[from] csv::Error),
}
#[derive(Debug, Serialize)]
struct CsvRow {
group_id: usize,
hash: String,
path: String,
size: u64,
modified: String,
}
pub struct CsvOutput<'a> {
groups: &'a [DuplicateGroup],
}
impl<'a> CsvOutput<'a> {
#[must_use]
pub fn new(groups: &'a [DuplicateGroup]) -> Self {
Self { groups }
}
pub fn write_to<W: io::Write>(&self, writer: W) -> Result<(), CsvOutputError> {
let mut csv_writer = csv::Writer::from_writer(writer);
for (idx, group) in self.groups.iter().enumerate() {
let group_id = idx + 1;
let hash_hex = group.hash_hex();
for file in &group.files {
let datetime: DateTime<Utc> = file.modified.into();
let modified = datetime.to_rfc3339();
let row = CsvRow {
group_id,
hash: hash_hex.clone(),
path: file.path.to_string_lossy().to_string(),
size: group.size,
modified,
};
csv_writer.serialize(row)?;
}
}
csv_writer.flush()?;
Ok(())
}
pub fn to_string(&self) -> Result<String, CsvOutputError> {
let mut buffer = Vec::new();
self.write_to(&mut buffer)?;
Ok(String::from_utf8_lossy(&buffer).to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::TempDir;
#[test]
fn test_csv_output_basic() {
let dir = TempDir::new().unwrap();
let file1 = dir.path().join("file1.txt");
let file2 = dir.path().join("file2.txt");
File::create(&file1).unwrap().write_all(b"content").unwrap();
File::create(&file2).unwrap().write_all(b"content").unwrap();
let now = std::time::SystemTime::now();
let groups = vec![DuplicateGroup::new(
[0u8; 32],
7,
vec![
crate::scanner::FileEntry::new(file1.clone(), 7, now),
crate::scanner::FileEntry::new(file2.clone(), 7, now),
],
Vec::new(),
)];
let output = CsvOutput::new(&groups);
let csv_str = output.to_string().unwrap();
assert!(csv_str.contains("group_id,hash,path,size,modified"));
assert!(
csv_str.contains("1,0000000000000000000000000000000000000000000000000000000000000000")
);
assert!(csv_str.contains("file1.txt"));
assert!(csv_str.contains("file2.txt"));
assert!(csv_str.contains(",7,"));
}
#[test]
fn test_csv_output_quoting() {
let dir = TempDir::new().unwrap();
let file_with_comma = dir.path().join("file,with,comma.txt");
File::create(&file_with_comma)
.unwrap()
.write_all(b"content")
.unwrap();
let now = std::time::SystemTime::now();
let groups = vec![DuplicateGroup::new(
[0u8; 32],
7,
vec![crate::scanner::FileEntry::new(file_with_comma, 7, now)],
Vec::new(),
)];
let output = CsvOutput::new(&groups);
let csv_str = output.to_string().unwrap();
assert!(csv_str.contains("\""));
assert!(csv_str.contains("file,with,comma.txt"));
}
}