Skip to main content

dataprof_core/
classification.rs

1/// Inferred column data type.
2#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
3pub enum DataType {
4    /// Text or string values.
5    String,
6    /// Identifier values that should be treated as semantic strings.
7    Identifier,
8    /// Whole numbers in the i64 range.
9    Integer,
10    /// Floating-point numbers.
11    Float,
12    /// Date or datetime values.
13    Date,
14    /// Boolean values.
15    Boolean,
16}
17
18/// Semantic category for a detected pattern.
19#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
20#[serde(rename_all = "snake_case")]
21pub enum PatternCategory {
22    /// Email addresses, phone numbers.
23    Contact,
24    /// UUIDs, fiscal codes, tax IDs.
25    Identifier,
26    /// IPv4, IPv6, MAC addresses, URLs.
27    Network,
28    /// Coordinates and postal codes.
29    Geographic,
30    /// IBANs, credit cards, SWIFT/BIC.
31    Financial,
32    /// Unix or Windows file paths.
33    FilePath,
34    /// Uncategorized patterns.
35    Other,
36}
37
38impl std::fmt::Display for PatternCategory {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        match self {
41            Self::Contact => write!(f, "contact"),
42            Self::Identifier => write!(f, "identifier"),
43            Self::Network => write!(f, "network"),
44            Self::Geographic => write!(f, "geographic"),
45            Self::Financial => write!(f, "financial"),
46            Self::FilePath => write!(f, "file_path"),
47            Self::Other => write!(f, "other"),
48        }
49    }
50}