1use std::borrow::Cow;
2
3use crate::{
4 proto::schema::{
5 self, field_data::Field, scalar_field::Data as ScalarData,
6 vector_field::Data as VectorData, DataType, ScalarField, VectorField,
7 },
8 schema::FieldSchema,
9 value::{Value, ValueVec},
10};
11
12pub trait HasDataType {
13 fn data_type() -> DataType;
14}
15
16macro_rules! impl_has_data_type {
17 ( $($t: ty, $o: expr ),+ ) => {$(
18 impl HasDataType for $t {
19 fn data_type() -> DataType {
20 $o
21 }
22 }
23 )*};
24}
25
26impl_has_data_type! {
27 bool, DataType::Bool,
28 i8, DataType::Int8,
29 i16, DataType::Int16,
30 i32, DataType::Int32,
31 i64, DataType::Int64,
32 f32, DataType::Float,
33 f64, DataType::Double,
34 String, DataType::String,
35 Cow<'_, str>, DataType::String,
36 Vec<f32>, DataType::FloatVector,
37 Vec<u8>, DataType::BinaryVector,
38 Cow<'_, [f32]>, DataType::FloatVector,
39 Cow<'_, [u8]>, DataType::BinaryVector
40}
41
42#[derive(Debug, Clone)]
43pub struct FieldColumn {
44 pub name: String,
45 pub dtype: DataType,
46 pub value: ValueVec,
47 pub dim: i64,
48 pub max_length: i32,
49}
50
51impl From<schema::FieldData> for FieldColumn {
52 fn from(fd: schema::FieldData) -> Self {
53 let (dim, max_length) = fd
54 .field
55 .as_ref()
56 .map(get_dim_max_length)
57 .unwrap_or((None, None));
58
59 let value: ValueVec = fd.field.map(Into::into).unwrap_or(ValueVec::None);
60 let dtype = DataType::from_i32(fd.r#type).unwrap_or(DataType::None);
61
62 FieldColumn {
63 name: fd.field_name,
64 dtype,
65 dim: dim.unwrap_or_else(|| match dtype {
66 DataType::None => 0,
67 DataType::Bool
68 | DataType::Int8
69 | DataType::Int16
70 | DataType::Int32
71 | DataType::Int64
72 | DataType::Float
73 | DataType::Double
74 | DataType::String
75 | DataType::VarChar => 1,
76 DataType::BinaryVector => 256,
77 DataType::FloatVector => 128,
78 }),
79 max_length: max_length.unwrap_or(0),
80 value,
81 }
82 }
83}
84
85impl FieldColumn {
86 pub fn new<V: Into<ValueVec>>(schm: &FieldSchema, v: V) -> FieldColumn {
87 FieldColumn {
88 name: schm.name.clone(),
89 dtype: schm.dtype,
90 value: v.into(),
91 dim: schm.dim,
92 max_length: schm.max_length,
93 }
94 }
95
96 pub fn get(&self, idx: usize) -> Option<Value<'_>> {
97 Some(match &self.value {
98 ValueVec::None => Value::None,
99 ValueVec::Bool(v) => Value::Bool(*v.get(idx)?),
100 ValueVec::Int(v) => match self.dtype {
101 DataType::Int8 => Value::Int8(*v.get(idx)? as _),
102 DataType::Int16 => Value::Int16(*v.get(idx)? as _),
103 DataType::Int32 => Value::Int32(*v.get(idx)?),
104 _ => unreachable!(),
105 },
106 ValueVec::Long(v) => Value::Long(*v.get(idx)?),
107 ValueVec::Float(v) => match self.dtype {
108 DataType::Float => Value::Float(*v.get(idx)?),
109 DataType::FloatVector => {
110 let dim = self.dim as usize;
111 Value::FloatArray(Cow::Borrowed(&v[idx * dim..idx * dim + dim]))
112 }
113 _ => unreachable!(),
114 },
115 ValueVec::Double(v) => Value::Double(*v.get(idx)?),
116 ValueVec::Binary(v) => {
117 let dim = (self.dim / 8) as usize;
118 Value::Binary(Cow::Borrowed(&v[idx * dim..idx * dim + dim]))
119 }
120 ValueVec::String(v) => Value::String(Cow::Borrowed(v.get(idx)?.as_ref())),
121 })
122 }
123
124 pub fn push(&mut self, val: Value) {
125 match (&mut self.value, val) {
126 (ValueVec::None, Value::None) => (),
127 (ValueVec::Bool(vec), Value::Bool(i)) => vec.push(i),
128 (ValueVec::Int(vec), Value::Int8(i)) => vec.push(i as _),
129 (ValueVec::Int(vec), Value::Int16(i)) => vec.push(i as _),
130 (ValueVec::Int(vec), Value::Int32(i)) => vec.push(i),
131 (ValueVec::Long(vec), Value::Long(i)) => vec.push(i),
132 (ValueVec::Float(vec), Value::Float(i)) => vec.push(i),
133 (ValueVec::Double(vec), Value::Double(i)) => vec.push(i),
134 (ValueVec::String(vec), Value::String(i)) => vec.push(i.to_string()),
135 (ValueVec::Binary(vec), Value::Binary(i)) => vec.extend_from_slice(i.as_ref()),
136 (ValueVec::Float(vec), Value::FloatArray(i)) => vec.extend_from_slice(i.as_ref()),
137 _ => panic!("column type mismatch"),
138 }
139 }
140
141 #[inline]
142 pub fn len(&self) -> usize {
143 self.value.len() / self.dim as usize
144 }
145
146 pub fn copy_with_metadata(&self) -> Self {
147 Self {
148 dim: self.dim,
149 dtype: self.dtype,
150 max_length: self.max_length,
151 name: self.name.clone(),
152 value: match &self.value {
153 ValueVec::None => ValueVec::None,
154 ValueVec::Bool(_) => ValueVec::Bool(Vec::new()),
155 ValueVec::Int(_) => ValueVec::Int(Vec::new()),
156 ValueVec::Long(_) => ValueVec::Long(Vec::new()),
157 ValueVec::Float(_) => ValueVec::Float(Vec::new()),
158 ValueVec::Double(_) => ValueVec::Double(Vec::new()),
159 ValueVec::String(_) => ValueVec::String(Vec::new()),
160 ValueVec::Binary(_) => ValueVec::Binary(Vec::new()),
161 },
162 }
163 }
164}
165
166impl From<FieldColumn> for schema::FieldData {
167 fn from(this: FieldColumn) -> schema::FieldData {
168 schema::FieldData {
169 field_name: this.name.to_string(),
170 field_id: 0,
171 r#type: this.dtype as _,
172 field: Some(match this.value {
173 ValueVec::None => Field::Scalars(ScalarField { data: None }),
174 ValueVec::Bool(v) => Field::Scalars(ScalarField {
175 data: Some(ScalarData::BoolData(schema::BoolArray { data: v })),
176 }),
177 ValueVec::Int(v) => Field::Scalars(ScalarField {
178 data: Some(ScalarData::IntData(schema::IntArray { data: v })),
179 }),
180 ValueVec::Long(v) => Field::Scalars(ScalarField {
181 data: Some(ScalarData::LongData(schema::LongArray { data: v })),
182 }),
183 ValueVec::Float(v) => match this.dtype {
184 DataType::Float => Field::Scalars(ScalarField {
185 data: Some(ScalarData::FloatData(schema::FloatArray { data: v })),
186 }),
187 DataType::FloatVector => Field::Vectors(VectorField {
188 data: Some(VectorData::FloatVector(schema::FloatArray { data: v })),
189 dim: this.dim,
190 }),
191 _ => unimplemented!(),
192 },
193 ValueVec::Double(v) => Field::Scalars(ScalarField {
194 data: Some(ScalarData::DoubleData(schema::DoubleArray { data: v })),
195 }),
196 ValueVec::String(v) => Field::Scalars(ScalarField {
197 data: Some(ScalarData::StringData(schema::StringArray { data: v })),
198 }),
199 ValueVec::Binary(v) => Field::Vectors(VectorField {
200 data: Some(VectorData::BinaryVector(v)),
201 dim: this.dim,
202 }),
203 }),
204 }
205 }
206}
207
208pub trait FromField: Sized {
209 fn from_field(field: Field) -> Option<Self>;
210}
211
212macro_rules! impl_from_field {
213 ( $( $t: ty [$($e:tt)*] ),+ ) => {$(
214
215 impl FromField for $t {
216 fn from_field(v: Field) -> Option<Self> {
217 match v {
218 $($e)*,
219 _ => None
220 }
221 }
222 }
223 )*};
224}
225
226impl_from_field! {
227 Vec<bool>[Field::Scalars(ScalarField {data: Some(ScalarData::BoolData(schema::BoolArray { data }))}) => Some(data)],
228 Vec<i8>[Field::Scalars(ScalarField {data: Some(ScalarData::IntData(schema::IntArray { data }))}) => Some(data.into_iter().map(|x|x as _).collect())],
229 Vec<i16>[Field::Scalars(ScalarField {data: Some(ScalarData::IntData(schema::IntArray { data }))}) => Some(data.into_iter().map(|x|x as _).collect())],
230 Vec<i32>[Field::Scalars(ScalarField {data: Some(ScalarData::IntData(schema::IntArray { data }))}) => Some(data)],
231 Vec<i64>[Field::Scalars(ScalarField {data: Some(ScalarData::LongData(schema::LongArray { data }))}) => Some(data)],
232 Vec<String>[Field::Scalars(ScalarField {data: Some(ScalarData::StringData(schema::StringArray { data }))}) => Some(data)],
233 Vec<f64>[Field::Scalars(ScalarField {data: Some(ScalarData::DoubleData(schema::DoubleArray { data }))}) => Some(data)],
234 Vec<u8>[Field::Vectors(VectorField {data: Some(VectorData::BinaryVector(data)), ..}) => Some(data)]
235}
236
237impl FromField for Vec<f32> {
238 fn from_field(field: Field) -> Option<Self> {
239 match field {
240 Field::Scalars(ScalarField {
241 data: Some(ScalarData::FloatData(schema::FloatArray { data })),
242 }) => Some(data),
243
244 Field::Vectors(VectorField {
245 data: Some(VectorData::FloatVector(schema::FloatArray { data })),
246 ..
247 }) => Some(data),
248
249 _ => None,
250 }
251 }
252}
253
254fn get_dim_max_length(field: &Field) -> (Option<i64>, Option<i32>) {
255 let dim = match field {
256 Field::Scalars(ScalarField { data: Some(_) }) => 1i64,
257 Field::Vectors(VectorField { dim, .. }) => *dim,
258 _ => 0i64,
259 };
260
261 (Some(dim), None) }