use std::io::{Seek, Write};
use std::path::Path;
use sha2::{Digest, Sha256};
use zip::write::SimpleFileOptions;
use zip::ZipWriter;
use crate::error::FingerprintResult;
use crate::models::Fingerprint;
use super::file_names;
use super::signing::DsfSigner;
#[derive(Debug, Clone)]
pub struct WriteOptions {
pub compression_level: u32,
pub pretty: bool,
}
impl Default for WriteOptions {
fn default() -> Self {
Self {
compression_level: 6,
pretty: true,
}
}
}
pub struct FingerprintWriter {
options: WriteOptions,
}
impl FingerprintWriter {
pub fn new() -> Self {
Self {
options: WriteOptions::default(),
}
}
pub fn with_options(options: WriteOptions) -> Self {
Self { options }
}
pub fn write_to_file(&self, fingerprint: &Fingerprint, path: &Path) -> FingerprintResult<()> {
let file = std::fs::File::create(path)?;
self.write(fingerprint, file)
}
pub fn write_to_file_signed(
&self,
fingerprint: &Fingerprint,
path: &Path,
signer: &DsfSigner,
) -> FingerprintResult<()> {
let file = std::fs::File::create(path)?;
self.write_signed(fingerprint, file, signer)
}
pub fn write_signed<W: Write + Seek>(
&self,
fingerprint: &Fingerprint,
writer: W,
signer: &DsfSigner,
) -> FingerprintResult<()> {
let mut zip = ZipWriter::new(writer);
let options = SimpleFileOptions::default().compression_method(
if self.options.compression_level > 0 {
zip::CompressionMethod::Deflated
} else {
zip::CompressionMethod::Stored
},
);
let mut checksums = std::collections::HashMap::new();
if !fingerprint.schema.is_empty() {
let schema_yaml = serde_yaml::to_string(&fingerprint.schema)?;
checksums.insert(
file_names::SCHEMA.to_string(),
compute_checksum(schema_yaml.as_bytes()),
);
zip.start_file(file_names::SCHEMA, options)?;
zip.write_all(schema_yaml.as_bytes())?;
}
if !fingerprint.statistics.is_empty() {
let stats_yaml = serde_yaml::to_string(&fingerprint.statistics)?;
checksums.insert(
file_names::STATISTICS.to_string(),
compute_checksum(stats_yaml.as_bytes()),
);
zip.start_file(file_names::STATISTICS, options)?;
zip.write_all(stats_yaml.as_bytes())?;
}
if let Some(ref correlations) = fingerprint.correlations {
let yaml = serde_yaml::to_string(correlations)?;
checksums.insert(
file_names::CORRELATIONS.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::CORRELATIONS, options)?;
zip.write_all(yaml.as_bytes())?;
}
if let Some(ref integrity) = fingerprint.integrity {
let yaml = serde_yaml::to_string(integrity)?;
checksums.insert(
file_names::INTEGRITY.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::INTEGRITY, options)?;
zip.write_all(yaml.as_bytes())?;
}
if let Some(ref rules) = fingerprint.rules {
let yaml = serde_yaml::to_string(rules)?;
checksums.insert(
file_names::RULES.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::RULES, options)?;
zip.write_all(yaml.as_bytes())?;
}
if let Some(ref anomalies) = fingerprint.anomalies {
let yaml = serde_yaml::to_string(anomalies)?;
checksums.insert(
file_names::ANOMALIES.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::ANOMALIES, options)?;
zip.write_all(yaml.as_bytes())?;
}
if let Some(ref behavioral) = fingerprint.behavioral {
let yaml = serde_yaml::to_string(behavioral)?;
checksums.insert(
file_names::BEHAVIORAL.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::BEHAVIORAL, options)?;
zip.write_all(yaml.as_bytes())?;
}
let audit_json = if self.options.pretty {
serde_json::to_string_pretty(&fingerprint.privacy_audit)?
} else {
serde_json::to_string(&fingerprint.privacy_audit)?
};
checksums.insert(
file_names::PRIVACY_AUDIT.to_string(),
compute_checksum(audit_json.as_bytes()),
);
zip.start_file(file_names::PRIVACY_AUDIT, options)?;
zip.write_all(audit_json.as_bytes())?;
let mut manifest = fingerprint.manifest.clone();
manifest.checksums = checksums;
manifest.signature = None;
let signature = signer.sign_manifest(&manifest);
manifest.signature = Some(signature);
let manifest_json = if self.options.pretty {
serde_json::to_string_pretty(&manifest)?
} else {
serde_json::to_string(&manifest)?
};
zip.start_file(file_names::MANIFEST, options)?;
zip.write_all(manifest_json.as_bytes())?;
zip.finish()?;
Ok(())
}
pub fn write<W: Write + Seek>(
&self,
fingerprint: &Fingerprint,
writer: W,
) -> FingerprintResult<()> {
let mut zip = ZipWriter::new(writer);
let options = SimpleFileOptions::default().compression_method(
if self.options.compression_level > 0 {
zip::CompressionMethod::Deflated
} else {
zip::CompressionMethod::Stored
},
);
let mut checksums = std::collections::HashMap::new();
let mut manifest = fingerprint.manifest.clone();
if !fingerprint.schema.is_empty() {
let schema_yaml = serde_yaml::to_string(&fingerprint.schema)?;
checksums.insert(
file_names::SCHEMA.to_string(),
compute_checksum(schema_yaml.as_bytes()),
);
zip.start_file(file_names::SCHEMA, options)?;
zip.write_all(schema_yaml.as_bytes())?;
}
if !fingerprint.statistics.is_empty() {
let stats_yaml = serde_yaml::to_string(&fingerprint.statistics)?;
checksums.insert(
file_names::STATISTICS.to_string(),
compute_checksum(stats_yaml.as_bytes()),
);
zip.start_file(file_names::STATISTICS, options)?;
zip.write_all(stats_yaml.as_bytes())?;
}
if let Some(ref correlations) = fingerprint.correlations {
let yaml = serde_yaml::to_string(correlations)?;
checksums.insert(
file_names::CORRELATIONS.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::CORRELATIONS, options)?;
zip.write_all(yaml.as_bytes())?;
}
if let Some(ref integrity) = fingerprint.integrity {
let yaml = serde_yaml::to_string(integrity)?;
checksums.insert(
file_names::INTEGRITY.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::INTEGRITY, options)?;
zip.write_all(yaml.as_bytes())?;
}
if let Some(ref rules) = fingerprint.rules {
let yaml = serde_yaml::to_string(rules)?;
checksums.insert(
file_names::RULES.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::RULES, options)?;
zip.write_all(yaml.as_bytes())?;
}
if let Some(ref anomalies) = fingerprint.anomalies {
let yaml = serde_yaml::to_string(anomalies)?;
checksums.insert(
file_names::ANOMALIES.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::ANOMALIES, options)?;
zip.write_all(yaml.as_bytes())?;
}
if let Some(ref behavioral) = fingerprint.behavioral {
let yaml = serde_yaml::to_string(behavioral)?;
checksums.insert(
file_names::BEHAVIORAL.to_string(),
compute_checksum(yaml.as_bytes()),
);
zip.start_file(file_names::BEHAVIORAL, options)?;
zip.write_all(yaml.as_bytes())?;
}
let audit_json = if self.options.pretty {
serde_json::to_string_pretty(&fingerprint.privacy_audit)?
} else {
serde_json::to_string(&fingerprint.privacy_audit)?
};
checksums.insert(
file_names::PRIVACY_AUDIT.to_string(),
compute_checksum(audit_json.as_bytes()),
);
zip.start_file(file_names::PRIVACY_AUDIT, options)?;
zip.write_all(audit_json.as_bytes())?;
manifest.checksums = checksums;
let manifest_json = if self.options.pretty {
serde_json::to_string_pretty(&manifest)?
} else {
serde_json::to_string(&manifest)?
};
zip.start_file(file_names::MANIFEST, options)?;
zip.write_all(manifest_json.as_bytes())?;
zip.finish()?;
Ok(())
}
}
impl Default for FingerprintWriter {
fn default() -> Self {
Self::new()
}
}
fn compute_checksum(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
hex::encode(hasher.finalize())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::io::reader::FingerprintReader;
use crate::models::{
Manifest, PrivacyAudit, PrivacyLevel, PrivacyMetadata, SchemaFingerprint, SourceMetadata,
StatisticsFingerprint,
};
use std::io::Cursor;
use zip::ZipArchive;
#[test]
fn test_write_fingerprint() {
let source = SourceMetadata::new("Test source", vec!["test_table".to_string()], 100);
let privacy = PrivacyMetadata::from_level(PrivacyLevel::Standard);
let manifest = Manifest::new(source, privacy);
let schema = SchemaFingerprint::new();
let statistics = StatisticsFingerprint::new();
let privacy_audit = PrivacyAudit::new(1.0, 5);
let fingerprint = Fingerprint::new(manifest, schema, statistics, privacy_audit);
let mut buffer = Cursor::new(Vec::new());
let writer = FingerprintWriter::new();
writer.write(&fingerprint, &mut buffer).unwrap();
let data = buffer.into_inner();
assert!(!data.is_empty());
assert_eq!(&data[0..2], b"PK"); }
#[test]
fn round_trip_skips_empty_schema_and_statistics() {
let source = SourceMetadata::new("test", vec![], 0);
let privacy = PrivacyMetadata::from_level(PrivacyLevel::Standard);
let manifest = Manifest::new(source, privacy);
let fp = Fingerprint::new(
manifest,
SchemaFingerprint::new(),
StatisticsFingerprint::new(),
PrivacyAudit::new(1.0, 5),
);
let mut buffer = Cursor::new(Vec::new());
let writer = FingerprintWriter::new();
writer.write(&fp, &mut buffer).unwrap();
let data = buffer.into_inner();
let archive = ZipArchive::new(Cursor::new(data.clone())).unwrap();
let names: Vec<&str> = archive.file_names().collect();
assert!(
!names.contains(&file_names::SCHEMA),
"empty schema should NOT be in the ZIP, got names: {names:?}"
);
assert!(
!names.contains(&file_names::STATISTICS),
"empty statistics should NOT be in the ZIP, got names: {names:?}"
);
let reader = FingerprintReader::new();
let loaded = reader.read(Cursor::new(data)).unwrap();
assert!(loaded.schema.is_empty(), "re-read schema should be empty");
assert!(
loaded.statistics.is_empty(),
"re-read statistics should be empty"
);
}
}