1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
use std::fmt;
use csv::StringRecord;
bitflags! {
/// Possible guesses for the field type. Implementged as a bitflag struct (see
/// [`bitflags`](https://docs.rs/bitflags/)).
#[derive(Default)]
pub(crate) struct TypeGuesses: u32 {
const BOOLEAN = 0b00000001;
const UNSIGNED = 0b00000010;
const SIGNED = 0b00000100;
const FLOAT = 0b00001000;
const TEXT = 0b00010000;
}
}
impl TypeGuesses {
/// Compute the 'best-fitting' `Type` among the guesses of this struct. 'Best-fitting' in this
/// case means the narrowest definition: `Type::Boolean` being the narrowest, and `Type::Text`
/// being the widest (since everything can be a text field).
pub(crate) fn best(&self) -> Type {
// if all values are some sort of boolean (0 or 1, or 'true' and 'false'), guess boolean
if self.contains(TypeGuesses::BOOLEAN) {
Type::Boolean
}
// if all values are integer and > 0, guess unsigned
else if self.contains(TypeGuesses::UNSIGNED) {
Type::Unsigned
}
// if all values are integer, but some < 0, guess signed
else if self.contains(TypeGuesses::SIGNED) {
Type::Signed
}
// if all values are numeric, but non-integer, guess float
else if self.contains(TypeGuesses::FLOAT) {
Type::Float
}
// doesn't fit anything else, it's a text field
else {
Type::Text
}
}
/// Returns `true` if `other` is 'allowed' in the types represented by `self`. For example,
/// if `self` is TypesGuesses::SIGNED | TypesGuesses::FLOAT | TypeGuesses::TEXT, and `other` is
/// TypesGuesses::TEXT, then `allows` returns `false` (since self is more restrictive than
/// other).
pub(crate) fn allows(&self, other: &TypeGuesses) -> bool {
!(*self - *other).is_empty()
}
}
pub(crate) fn infer_types(s: &str) -> TypeGuesses {
if s.is_empty() {
// empty fields can be of any type; or rather, of no known type
return TypeGuesses::all();
}
let mut guesses = TypeGuesses::default();
guesses |= TypeGuesses::TEXT;
if s.parse::<u64>().is_ok() {
guesses |= TypeGuesses::UNSIGNED;
}
if s.parse::<i64>().is_ok() {
guesses |= TypeGuesses::SIGNED;
}
if s.parse::<bool>().is_ok() {
guesses |= TypeGuesses::BOOLEAN;
}
if s.parse::<f64>().is_ok() {
guesses |= TypeGuesses::FLOAT;
}
guesses
}
pub(crate) fn infer_record_types(record: &StringRecord) -> Vec<TypeGuesses> {
record.iter().map(infer_types).collect()
}
/// The valid field types for fields in a CSV record.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Type {
/// Unsigned integer (integer >= 0)
Unsigned,
/// Signed integer
Signed,
/// Text (any field can be a type)
Text,
/// Boolean (true / false or 0 / 1)
Boolean,
/// Floating-point
Float,
}
pub(crate) fn get_best_types(guesses: Vec<TypeGuesses>) -> Vec<Type> {
guesses.iter().map(|guess| guess.best()).collect()
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}",
match *self {
Type::Unsigned => "Unsigned",
Type::Signed => "Signed",
Type::Text => "Text",
Type::Boolean => "Boolean",
Type::Float => "Float",
}
)
}
}