csv_sniffer/
field_type.rs

1use std::fmt;
2
3use csv::StringRecord;
4
5bitflags! {
6    /// Possible guesses for the field type. Implementged as a bitflag struct (see
7    /// [`bitflags`](https://docs.rs/bitflags/)).
8    #[derive(Default)]
9    pub(crate) struct TypeGuesses: u32 {
10        const BOOLEAN   = 0b00000001;
11        const UNSIGNED  = 0b00000010;
12        const SIGNED    = 0b00000100;
13        const FLOAT     = 0b00001000;
14        const TEXT      = 0b00010000;
15    }
16}
17
18impl TypeGuesses {
19    /// Compute the 'best-fitting' `Type` among the guesses of this struct. 'Best-fitting' in this
20    /// case means the narrowest definition: `Type::Boolean` being the narrowest, and `Type::Text`
21    /// being the widest (since everything can be a text field).
22    pub(crate) fn best(&self) -> Type {
23        // if all values are some sort of boolean (0 or 1, or 'true' and 'false'), guess boolean
24        if self.contains(TypeGuesses::BOOLEAN) {
25            Type::Boolean
26        }
27        // if all values are integer and > 0, guess unsigned
28        else if self.contains(TypeGuesses::UNSIGNED) {
29            Type::Unsigned
30        }
31        // if all values are integer, but some < 0, guess signed
32        else if self.contains(TypeGuesses::SIGNED) {
33            Type::Signed
34        }
35        // if all values are numeric, but non-integer, guess float
36        else if self.contains(TypeGuesses::FLOAT) {
37            Type::Float
38        }
39        // doesn't fit anything else, it's a text field
40        else {
41            Type::Text
42        }
43    }
44    /// Returns `true` if `other` is 'allowed' in the types represented by `self`. For example,
45    /// if `self` is TypesGuesses::SIGNED | TypesGuesses::FLOAT | TypeGuesses::TEXT, and `other` is
46    /// TypesGuesses::TEXT, then `allows` returns `false` (since self is more restrictive than
47    /// other).
48    pub(crate) fn allows(&self, other: &TypeGuesses) -> bool {
49        !(*self - *other).is_empty()
50    }
51}
52
53pub(crate) fn infer_types(s: &str) -> TypeGuesses {
54    if s.is_empty() {
55        // empty fields can be of any type; or rather, of no known type
56        return TypeGuesses::all();
57    }
58    let mut guesses = TypeGuesses::default();
59    guesses |= TypeGuesses::TEXT;
60    if s.parse::<u64>().is_ok() {
61        guesses |= TypeGuesses::UNSIGNED;
62    }
63    if s.parse::<i64>().is_ok() {
64        guesses |= TypeGuesses::SIGNED;
65    }
66    if s.parse::<bool>().is_ok() {
67        guesses |= TypeGuesses::BOOLEAN;
68    }
69    if s.parse::<f64>().is_ok() {
70        guesses |= TypeGuesses::FLOAT;
71    }
72    guesses
73}
74
75pub(crate) fn infer_record_types(record: &StringRecord) -> Vec<TypeGuesses> {
76    record.iter().map(infer_types).collect()
77}
78
79/// The valid field types for fields in a CSV record.
80#[derive(Debug, Clone, Copy, PartialEq)]
81pub enum Type {
82    /// Unsigned integer (integer >= 0)
83    Unsigned,
84    /// Signed integer
85    Signed,
86    /// Text (any field can be a type)
87    Text,
88    /// Boolean (true / false or 0 / 1)
89    Boolean,
90    /// Floating-point
91    Float,
92}
93pub(crate) fn get_best_types(guesses: Vec<TypeGuesses>) -> Vec<Type> {
94    guesses.iter().map(|guess| guess.best()).collect()
95}
96impl fmt::Display for Type {
97    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
98        write!(
99            f,
100            "{}",
101            match *self {
102                Type::Unsigned => "Unsigned",
103                Type::Signed => "Signed",
104                Type::Text => "Text",
105                Type::Boolean => "Boolean",
106                Type::Float => "Float",
107            }
108        )
109    }
110}