milvus/
data.rs

1use std::borrow::Cow;
2
3use crate::{
4    proto::schema::{
5        self, field_data::Field, scalar_field::Data as ScalarData,
6        vector_field::Data as VectorData, DataType, ScalarField, VectorField,
7    },
8    schema::FieldSchema,
9    value::{Value, ValueVec},
10};
11
12pub trait HasDataType {
13    fn data_type() -> DataType;
14}
15
16macro_rules! impl_has_data_type {
17    ( $($t: ty, $o: expr ),+ ) => {$(
18        impl HasDataType for $t {
19            fn data_type() -> DataType {
20                $o
21            }
22        }
23    )*};
24}
25
26impl_has_data_type! {
27    bool, DataType::Bool,
28    i8, DataType::Int8,
29    i16, DataType::Int16,
30    i32, DataType::Int32,
31    i64, DataType::Int64,
32    f32, DataType::Float,
33    f64, DataType::Double,
34    String, DataType::String,
35    Cow<'_, str>, DataType::String,
36    Vec<f32>, DataType::FloatVector,
37    Vec<u8>, DataType::BinaryVector,
38    Cow<'_, [f32]>, DataType::FloatVector,
39    Cow<'_, [u8]>, DataType::BinaryVector
40}
41
42#[derive(Debug, Clone)]
43pub struct FieldColumn {
44    pub name: String,
45    pub dtype: DataType,
46    pub value: ValueVec,
47    pub dim: i64,
48    pub max_length: i32,
49}
50
51impl From<schema::FieldData> for FieldColumn {
52    fn from(fd: schema::FieldData) -> Self {
53        let (dim, max_length) = fd
54            .field
55            .as_ref()
56            .map(get_dim_max_length)
57            .unwrap_or((None, None));
58
59        let value: ValueVec = fd.field.map(Into::into).unwrap_or(ValueVec::None);
60        let dtype = DataType::from_i32(fd.r#type).unwrap_or(DataType::None);
61
62        FieldColumn {
63            name: fd.field_name,
64            dtype,
65            dim: dim.unwrap_or_else(|| match dtype {
66                DataType::None => 0,
67                DataType::Bool
68                | DataType::Int8
69                | DataType::Int16
70                | DataType::Int32
71                | DataType::Int64
72                | DataType::Float
73                | DataType::Double
74                | DataType::String
75                | DataType::VarChar => 1,
76                DataType::BinaryVector => 256,
77                DataType::FloatVector => 128,
78            }),
79            max_length: max_length.unwrap_or(0),
80            value,
81        }
82    }
83}
84
85impl FieldColumn {
86    pub fn new<V: Into<ValueVec>>(schm: &FieldSchema, v: V) -> FieldColumn {
87        FieldColumn {
88            name: schm.name.clone(),
89            dtype: schm.dtype,
90            value: v.into(),
91            dim: schm.dim,
92            max_length: schm.max_length,
93        }
94    }
95
96    pub fn get(&self, idx: usize) -> Option<Value<'_>> {
97        Some(match &self.value {
98            ValueVec::None => Value::None,
99            ValueVec::Bool(v) => Value::Bool(*v.get(idx)?),
100            ValueVec::Int(v) => match self.dtype {
101                DataType::Int8 => Value::Int8(*v.get(idx)? as _),
102                DataType::Int16 => Value::Int16(*v.get(idx)? as _),
103                DataType::Int32 => Value::Int32(*v.get(idx)?),
104                _ => unreachable!(),
105            },
106            ValueVec::Long(v) => Value::Long(*v.get(idx)?),
107            ValueVec::Float(v) => match self.dtype {
108                DataType::Float => Value::Float(*v.get(idx)?),
109                DataType::FloatVector => {
110                    let dim = self.dim as usize;
111                    Value::FloatArray(Cow::Borrowed(&v[idx * dim..idx * dim + dim]))
112                }
113                _ => unreachable!(),
114            },
115            ValueVec::Double(v) => Value::Double(*v.get(idx)?),
116            ValueVec::Binary(v) => {
117                let dim = (self.dim / 8) as usize;
118                Value::Binary(Cow::Borrowed(&v[idx * dim..idx * dim + dim]))
119            }
120            ValueVec::String(v) => Value::String(Cow::Borrowed(v.get(idx)?.as_ref())),
121        })
122    }
123
124    pub fn push(&mut self, val: Value) {
125        match (&mut self.value, val) {
126            (ValueVec::None, Value::None) => (),
127            (ValueVec::Bool(vec), Value::Bool(i)) => vec.push(i),
128            (ValueVec::Int(vec), Value::Int8(i)) => vec.push(i as _),
129            (ValueVec::Int(vec), Value::Int16(i)) => vec.push(i as _),
130            (ValueVec::Int(vec), Value::Int32(i)) => vec.push(i),
131            (ValueVec::Long(vec), Value::Long(i)) => vec.push(i),
132            (ValueVec::Float(vec), Value::Float(i)) => vec.push(i),
133            (ValueVec::Double(vec), Value::Double(i)) => vec.push(i),
134            (ValueVec::String(vec), Value::String(i)) => vec.push(i.to_string()),
135            (ValueVec::Binary(vec), Value::Binary(i)) => vec.extend_from_slice(i.as_ref()),
136            (ValueVec::Float(vec), Value::FloatArray(i)) => vec.extend_from_slice(i.as_ref()),
137            _ => panic!("column type mismatch"),
138        }
139    }
140
141    #[inline]
142    pub fn len(&self) -> usize {
143        self.value.len() / self.dim as usize
144    }
145
146    pub fn copy_with_metadata(&self) -> Self {
147        Self {
148            dim: self.dim,
149            dtype: self.dtype,
150            max_length: self.max_length,
151            name: self.name.clone(),
152            value: match &self.value {
153                ValueVec::None => ValueVec::None,
154                ValueVec::Bool(_) => ValueVec::Bool(Vec::new()),
155                ValueVec::Int(_) => ValueVec::Int(Vec::new()),
156                ValueVec::Long(_) => ValueVec::Long(Vec::new()),
157                ValueVec::Float(_) => ValueVec::Float(Vec::new()),
158                ValueVec::Double(_) => ValueVec::Double(Vec::new()),
159                ValueVec::String(_) => ValueVec::String(Vec::new()),
160                ValueVec::Binary(_) => ValueVec::Binary(Vec::new()),
161            },
162        }
163    }
164}
165
166impl From<FieldColumn> for schema::FieldData {
167    fn from(this: FieldColumn) -> schema::FieldData {
168        schema::FieldData {
169            field_name: this.name.to_string(),
170            field_id: 0,
171            r#type: this.dtype as _,
172            field: Some(match this.value {
173                ValueVec::None => Field::Scalars(ScalarField { data: None }),
174                ValueVec::Bool(v) => Field::Scalars(ScalarField {
175                    data: Some(ScalarData::BoolData(schema::BoolArray { data: v })),
176                }),
177                ValueVec::Int(v) => Field::Scalars(ScalarField {
178                    data: Some(ScalarData::IntData(schema::IntArray { data: v })),
179                }),
180                ValueVec::Long(v) => Field::Scalars(ScalarField {
181                    data: Some(ScalarData::LongData(schema::LongArray { data: v })),
182                }),
183                ValueVec::Float(v) => match this.dtype {
184                    DataType::Float => Field::Scalars(ScalarField {
185                        data: Some(ScalarData::FloatData(schema::FloatArray { data: v })),
186                    }),
187                    DataType::FloatVector => Field::Vectors(VectorField {
188                        data: Some(VectorData::FloatVector(schema::FloatArray { data: v })),
189                        dim: this.dim,
190                    }),
191                    _ => unimplemented!(),
192                },
193                ValueVec::Double(v) => Field::Scalars(ScalarField {
194                    data: Some(ScalarData::DoubleData(schema::DoubleArray { data: v })),
195                }),
196                ValueVec::String(v) => Field::Scalars(ScalarField {
197                    data: Some(ScalarData::StringData(schema::StringArray { data: v })),
198                }),
199                ValueVec::Binary(v) => Field::Vectors(VectorField {
200                    data: Some(VectorData::BinaryVector(v)),
201                    dim: this.dim,
202                }),
203            }),
204        }
205    }
206}
207
208pub trait FromField: Sized {
209    fn from_field(field: Field) -> Option<Self>;
210}
211
212macro_rules! impl_from_field {
213    ( $( $t: ty [$($e:tt)*] ),+ ) => {$(
214
215        impl FromField for $t {
216            fn from_field(v: Field) -> Option<Self> {
217                match v {
218                    $($e)*,
219                    _ => None
220                }
221            }
222        }
223    )*};
224}
225
226impl_from_field! {
227    Vec<bool>[Field::Scalars(ScalarField {data: Some(ScalarData::BoolData(schema::BoolArray { data }))}) => Some(data)],
228    Vec<i8>[Field::Scalars(ScalarField {data: Some(ScalarData::IntData(schema::IntArray { data }))}) => Some(data.into_iter().map(|x|x as _).collect())],
229    Vec<i16>[Field::Scalars(ScalarField {data: Some(ScalarData::IntData(schema::IntArray { data }))}) => Some(data.into_iter().map(|x|x as _).collect())],
230    Vec<i32>[Field::Scalars(ScalarField {data: Some(ScalarData::IntData(schema::IntArray { data }))}) => Some(data)],
231    Vec<i64>[Field::Scalars(ScalarField {data: Some(ScalarData::LongData(schema::LongArray { data }))}) => Some(data)],
232    Vec<String>[Field::Scalars(ScalarField {data: Some(ScalarData::StringData(schema::StringArray { data }))}) => Some(data)],
233    Vec<f64>[Field::Scalars(ScalarField {data: Some(ScalarData::DoubleData(schema::DoubleArray { data }))}) => Some(data)],
234    Vec<u8>[Field::Vectors(VectorField {data: Some(VectorData::BinaryVector(data)), ..}) => Some(data)]
235}
236
237impl FromField for Vec<f32> {
238    fn from_field(field: Field) -> Option<Self> {
239        match field {
240            Field::Scalars(ScalarField {
241                data: Some(ScalarData::FloatData(schema::FloatArray { data })),
242            }) => Some(data),
243
244            Field::Vectors(VectorField {
245                data: Some(VectorData::FloatVector(schema::FloatArray { data })),
246                ..
247            }) => Some(data),
248
249            _ => None,
250        }
251    }
252}
253
254fn get_dim_max_length(field: &Field) -> (Option<i64>, Option<i32>) {
255    let dim = match field {
256        Field::Scalars(ScalarField { data: Some(_) }) => 1i64,
257        Field::Vectors(VectorField { dim, .. }) => *dim,
258        _ => 0i64,
259    };
260
261    (Some(dim), None) // no idea how to get max_length
262}