1use std::fmt;
8
9use crate::table::{CellValue, Table};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13#[non_exhaustive]
14pub enum ColumnKind {
15 Integer,
17 Float,
19 Boolean,
21 Text,
23 Mixed,
25 MissingOnly,
27}
28
29impl fmt::Display for ColumnKind {
30 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31 let s = match self {
32 ColumnKind::Integer => "integer",
33 ColumnKind::Float => "float",
34 ColumnKind::Boolean => "boolean",
35 ColumnKind::Text => "text",
36 ColumnKind::Mixed => "mixed",
37 ColumnKind::MissingOnly => "missing-only",
38 };
39 f.write_str(s)
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct ColumnSummary {
46 pub name: String,
48 pub kind: ColumnKind,
50 pub missing: usize,
52}
53
54#[derive(Debug, Clone)]
56pub struct SchemaSummary {
57 pub rows: usize,
59 pub columns: usize,
61 per_column: Vec<ColumnSummary>,
62}
63
64impl SchemaSummary {
65 pub fn column_summaries(&self) -> &[ColumnSummary] {
67 &self.per_column
68 }
69}
70
71impl fmt::Display for SchemaSummary {
72 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73 writeln!(f, "Table: {} rows x {} columns", self.rows, self.columns)?;
74 for col in &self.per_column {
75 writeln!(
76 f,
77 " - {} ({}, {} missing)",
78 col.name, col.kind, col.missing
79 )?;
80 }
81 Ok(())
82 }
83}
84
85pub(crate) fn summarize(table: &Table) -> SchemaSummary {
87 let headers = table.headers();
88 let rows = table.rows();
89
90 let per_column = headers
91 .iter()
92 .enumerate()
93 .map(|(c, name)| {
94 let mut missing = 0usize;
95 let mut has_int = false;
96 let mut has_float = false;
97 let mut has_bool = false;
98 let mut has_text = false;
99
100 for row in rows {
101 match &row[c] {
102 CellValue::Missing => missing += 1,
103 CellValue::Int(_) => has_int = true,
104 CellValue::Float(_) => has_float = true,
105 CellValue::Bool(_) => has_bool = true,
106 CellValue::Text(_) => has_text = true,
107 }
108 }
109
110 let kind = infer_kind(has_int, has_float, has_bool, has_text);
111 ColumnSummary {
112 name: name.clone(),
113 kind,
114 missing,
115 }
116 })
117 .collect();
118
119 SchemaSummary {
120 rows: rows.len(),
121 columns: headers.len(),
122 per_column,
123 }
124}
125
126fn infer_kind(has_int: bool, has_float: bool, has_bool: bool, has_text: bool) -> ColumnKind {
127 let numeric = has_int || has_float;
128 let categories = [numeric, has_bool, has_text].iter().filter(|&&b| b).count();
129
130 match categories {
131 0 => ColumnKind::MissingOnly,
132 1 => {
133 if has_bool {
134 ColumnKind::Boolean
135 } else if has_text {
136 ColumnKind::Text
137 } else if has_float {
138 ColumnKind::Float
139 } else {
140 ColumnKind::Integer
141 }
142 }
143 _ => ColumnKind::Mixed,
144 }
145}