dataprof_runtime/
profile_report.rs1use dataprof_core::{ColumnProfile, DataSource, ExecutionMetadata};
2use dataprof_metrics::{QualityAssessment, QualityMetrics};
3
4#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
11pub struct ProfileReport {
12 pub id: String,
14 pub timestamp: String,
16 pub data_source: DataSource,
18 pub column_profiles: Vec<ColumnProfile>,
20 #[serde(alias = "scan_info")]
22 pub execution: ExecutionMetadata,
23 #[serde(
25 alias = "data_quality_metrics",
26 skip_serializing_if = "Option::is_none",
27 default,
28 deserialize_with = "deserialize_quality_compat"
29 )]
30 pub quality: Option<QualityAssessment>,
31}
32
33impl ProfileReport {
34 pub fn new(
36 data_source: DataSource,
37 column_profiles: Vec<ColumnProfile>,
38 execution: ExecutionMetadata,
39 quality: Option<QualityAssessment>,
40 ) -> Self {
41 Self {
42 id: uuid::Uuid::new_v4().to_string(),
43 timestamp: chrono::Utc::now().to_rfc3339(),
44 data_source,
45 column_profiles,
46 execution,
47 quality,
48 }
49 }
50
51 pub fn with_id(mut self, id: impl Into<String>) -> Self {
53 self.id = id.into();
54 self
55 }
56
57 pub fn with_timestamp(mut self, timestamp: impl Into<String>) -> Self {
59 self.timestamp = timestamp.into();
60 self
61 }
62
63 pub fn quality_score(&self) -> Option<f64> {
66 self.quality.as_ref().map(|q| q.score())
67 }
68
69 pub fn source_identifier(&self) -> String {
71 self.data_source.identifier()
72 }
73}
74
75fn deserialize_quality_compat<'de, D>(
78 deserializer: D,
79) -> Result<Option<QualityAssessment>, D::Error>
80where
81 D: serde::Deserializer<'de>,
82{
83 use serde::Deserialize;
84
85 let value: Option<serde_json::Value> = Option::deserialize(deserializer)?;
86 match value {
87 None => Ok(None),
88 Some(v) => {
89 if v.get("metrics").is_some() && v.get("confidence").is_some() {
90 let assessment: QualityAssessment =
91 serde_json::from_value(v).map_err(serde::de::Error::custom)?;
92 Ok(Some(assessment))
93 } else {
94 let metrics: QualityMetrics =
95 serde_json::from_value(v).map_err(serde::de::Error::custom)?;
96 Ok(Some(QualityAssessment::exact(metrics)))
97 }
98 }
99 }
100}
101
102#[cfg(test)]
103mod tests {
104 use super::*;
105 use dataprof_core::FileFormat;
106 use dataprof_metrics::MetricConfidence;
107 use serde_json::json;
108
109 #[test]
110 fn test_profile_report_json_roundtrip() {
111 let report = ProfileReport::new(
112 DataSource::File {
113 path: "test.csv".to_string(),
114 format: FileFormat::Csv,
115 size_bytes: 1024,
116 modified_at: None,
117 parquet_metadata: None,
118 },
119 vec![],
120 ExecutionMetadata::new(100, 5, 50),
121 Some(QualityAssessment::exact(QualityMetrics::empty())),
122 );
123
124 let json = serde_json::to_string(&report).unwrap();
125 let deserialized: ProfileReport = serde_json::from_str(&json).unwrap();
126
127 assert_eq!(deserialized.id, report.id);
128 assert_eq!(deserialized.timestamp, report.timestamp);
129 assert_eq!(deserialized.source_identifier(), "test.csv");
130 assert_eq!(deserialized.execution.rows_processed, 100);
131 assert!(deserialized.quality.is_some());
132 }
133
134 #[test]
135 fn test_profile_report_without_quality() {
136 let report = ProfileReport::new(
137 DataSource::File {
138 path: "test.csv".to_string(),
139 format: FileFormat::Csv,
140 size_bytes: 1024,
141 modified_at: None,
142 parquet_metadata: None,
143 },
144 vec![],
145 ExecutionMetadata::new(100, 5, 50),
146 None,
147 );
148
149 let json = serde_json::to_string(&report).unwrap();
150 let deserialized: ProfileReport = serde_json::from_str(&json).unwrap();
151
152 assert!(deserialized.quality.is_none());
153 assert_eq!(deserialized.execution.rows_processed, 100);
154 }
155
156 #[test]
157 fn test_profile_report_deserializes_legacy_quality_metrics() {
158 let json = json!({
159 "id": "legacy-report",
160 "timestamp": "2026-05-22T10:00:00Z",
161 "data_source": {
162 "type": "file",
163 "path": "test.csv",
164 "format": "csv",
165 "size_bytes": 42
166 },
167 "column_profiles": [],
168 "scan_info": {
169 "rows_processed": 10,
170 "columns_detected": 2,
171 "scan_time_ms": 5,
172 "error_count": 0,
173 "source_exhausted": true,
174 "sampling_applied": false
175 },
176 "data_quality_metrics": {
177 "completeness": {
178 "missing_values_ratio": 0.0,
179 "complete_records_ratio": 100.0,
180 "null_columns": []
181 }
182 }
183 });
184
185 let report: ProfileReport = serde_json::from_value(json).unwrap();
186
187 assert_eq!(report.id, "legacy-report");
188 assert_eq!(report.execution.rows_processed, 10);
189 let quality = report
190 .quality
191 .expect("expected legacy quality to deserialize");
192 assert!(matches!(quality.confidence, MetricConfidence::Exact));
193 assert!((quality.score() - 100.0).abs() < 0.01);
194 }
195}