1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct ColumnProfile {
8 pub name: String,
9 pub data_type: DataType,
10 pub null_count: usize,
11 pub null_percentage: f64,
12 pub unique_count: usize,
13 pub unique_percentage: f64,
14 pub distinct_values: Vec<String>,
15 pub top_values: Vec<ValueFrequency>,
16 pub length_stats: Option<LengthStats>,
17 pub numeric_stats: Option<NumericStats>,
18 pub date_stats: Option<DateStats>,
19 pub text_stats: Option<TextStats>,
20 pub quality_score: f64,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub enum DataType {
26 String,
27 Integer,
28 Float,
29 Boolean,
30 Date,
31 DateTime,
32 Email,
33 Url,
34 Phone,
35 Unknown,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct ValueFrequency {
41 pub value: String,
42 pub count: usize,
43 pub percentage: f64,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct LengthStats {
49 pub min_length: usize,
50 pub max_length: usize,
51 pub avg_length: f64,
52 pub median_length: usize,
53 pub std_dev_length: f64,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct NumericStats {
59 pub min: f64,
60 pub max: f64,
61 pub mean: f64,
62 pub median: f64,
63 pub mode: Vec<String>,
64 pub std_dev: f64,
65 pub variance: f64,
66 pub q1: f64,
67 pub q3: f64,
68 pub iqr: f64,
69 pub skewness: f64,
70 pub kurtosis: f64,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct DateStats {
76 pub min_date: String,
77 pub max_date: String,
78 pub date_range_days: i64,
79 pub most_common_year: u32,
80 pub most_common_month: u32,
81 pub most_common_day_of_week: String,
82}
83
84#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct TextStats {
87 pub avg_word_count: f64,
88 pub max_word_count: usize,
89 pub min_word_count: usize,
90 pub contains_numbers: bool,
91 pub contains_special_chars: bool,
92 pub all_uppercase: usize,
93 pub all_lowercase: usize,
94 pub title_case: usize,
95 pub mixed_case: usize,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct DataProfile {
101 pub file_path: String,
102 pub total_rows: usize,
103 pub total_columns: usize,
104 pub total_cells: usize,
105 pub null_cells: usize,
106 pub null_percentage: f64,
107 pub duplicate_rows: usize,
108 pub duplicate_percentage: f64,
109 pub columns: Vec<ColumnProfile>,
110 pub data_quality_score: f64,
111 pub recommendations: Vec<String>,
112 pub profiling_timestamp: String,
113}