milli_core/update/new/
document_change.rs

1use bumpalo::Bump;
2use heed::RoTxn;
3use serde_json::Value;
4
5use super::document::{
6    Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
7};
8use super::vector_document::{
9    MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
10};
11use crate::attribute_patterns::PatternMatch;
12use crate::documents::FieldIdMapper;
13use crate::vector::EmbeddingConfigs;
14use crate::{DocumentId, Index, InternalError, Result};
15
16pub enum DocumentChange<'doc> {
17    Deletion(Deletion<'doc>),
18    Update(Update<'doc>),
19    Insertion(Insertion<'doc>),
20}
21
22pub struct Deletion<'doc> {
23    docid: DocumentId,
24    external_document_id: &'doc str,
25}
26
27pub struct Update<'doc> {
28    docid: DocumentId,
29    external_document_id: &'doc str,
30    new: Versions<'doc>,
31    from_scratch: bool,
32}
33
34pub struct Insertion<'doc> {
35    docid: DocumentId,
36    external_document_id: &'doc str,
37    new: Versions<'doc>,
38}
39
40impl<'doc> DocumentChange<'doc> {
41    pub fn docid(&self) -> DocumentId {
42        match &self {
43            Self::Deletion(inner) => inner.docid(),
44            Self::Update(inner) => inner.docid(),
45            Self::Insertion(inner) => inner.docid(),
46        }
47    }
48
49    pub fn external_docid(&self) -> &'doc str {
50        match self {
51            DocumentChange::Deletion(deletion) => deletion.external_document_id(),
52            DocumentChange::Update(update) => update.external_document_id(),
53            DocumentChange::Insertion(insertion) => insertion.external_document_id(),
54        }
55    }
56}
57
58impl<'doc> Deletion<'doc> {
59    pub fn create(docid: DocumentId, external_document_id: &'doc str) -> Self {
60        Self { docid, external_document_id }
61    }
62
63    pub fn docid(&self) -> DocumentId {
64        self.docid
65    }
66
67    pub fn external_document_id(&self) -> &'doc str {
68        self.external_document_id
69    }
70
71    pub fn current<'a, Mapper: FieldIdMapper>(
72        &self,
73        rtxn: &'a RoTxn,
74        index: &'a Index,
75        mapper: &'a Mapper,
76    ) -> Result<DocumentFromDb<'a, Mapper>> {
77        Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper)?.ok_or(
78            crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
79        )?)
80    }
81}
82
83impl<'doc> Insertion<'doc> {
84    pub fn create(docid: DocumentId, external_document_id: &'doc str, new: Versions<'doc>) -> Self {
85        Insertion { docid, external_document_id, new }
86    }
87
88    pub fn docid(&self) -> DocumentId {
89        self.docid
90    }
91
92    pub fn external_document_id(&self) -> &'doc str {
93        self.external_document_id
94    }
95    pub fn inserted(&self) -> DocumentFromVersions<'_, 'doc> {
96        DocumentFromVersions::new(&self.new)
97    }
98
99    pub fn inserted_vectors(
100        &self,
101        doc_alloc: &'doc Bump,
102        embedders: &'doc EmbeddingConfigs,
103    ) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
104        VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders)
105    }
106}
107
108impl<'doc> Update<'doc> {
109    pub fn create(
110        docid: DocumentId,
111        external_document_id: &'doc str,
112        new: Versions<'doc>,
113        from_scratch: bool,
114    ) -> Self {
115        Update { docid, new, external_document_id, from_scratch }
116    }
117
118    pub fn docid(&self) -> DocumentId {
119        self.docid
120    }
121
122    pub fn external_document_id(&self) -> &'doc str {
123        self.external_document_id
124    }
125    pub fn current<'a, Mapper: FieldIdMapper>(
126        &self,
127        rtxn: &'a RoTxn,
128        index: &'a Index,
129        mapper: &'a Mapper,
130    ) -> Result<DocumentFromDb<'a, Mapper>> {
131        Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper)?.ok_or(
132            crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
133        )?)
134    }
135
136    pub fn current_vectors<'a, Mapper: FieldIdMapper>(
137        &self,
138        rtxn: &'a RoTxn,
139        index: &'a Index,
140        mapper: &'a Mapper,
141        doc_alloc: &'a Bump,
142    ) -> Result<VectorDocumentFromDb<'a>> {
143        Ok(VectorDocumentFromDb::new(self.docid, index, rtxn, mapper, doc_alloc)?.ok_or(
144            crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
145        )?)
146    }
147
148    pub fn only_changed_fields(&self) -> DocumentFromVersions<'_, 'doc> {
149        DocumentFromVersions::new(&self.new)
150    }
151
152    pub fn merged<'t, Mapper: FieldIdMapper>(
153        &self,
154        rtxn: &'t RoTxn,
155        index: &'t Index,
156        mapper: &'t Mapper,
157    ) -> Result<MergedDocument<'_, 'doc, 't, Mapper>> {
158        if self.from_scratch {
159            Ok(MergedDocument::without_db(DocumentFromVersions::new(&self.new)))
160        } else {
161            MergedDocument::with_db(
162                self.docid,
163                rtxn,
164                index,
165                mapper,
166                DocumentFromVersions::new(&self.new),
167            )
168        }
169    }
170
171    /// Returns whether the updated version of the document is different from the current version for the subset of fields selected by `selector`.
172    ///
173    /// `true` if at least one top-level-field that is exactly a selected field or a parent of a selected field changed.
174    /// Otherwise `false`.
175    ///
176    /// - Note: `_geo` and `_vectors` are not taken into account by this function.
177    pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
178        &self,
179        selector: &mut impl FnMut(&str) -> PatternMatch,
180        rtxn: &'t RoTxn,
181        index: &'t Index,
182        mapper: &'t Mapper,
183    ) -> Result<bool> {
184        let mut changed = false;
185        let mut cached_current = None;
186        let mut updated_selected_field_count = 0;
187
188        for entry in self.only_changed_fields().iter_top_level_fields() {
189            let (key, updated_value) = entry?;
190
191            if selector(key) == PatternMatch::NoMatch {
192                continue;
193            }
194
195            updated_selected_field_count += 1;
196            let current = match cached_current {
197                Some(current) => current,
198                None => self.current(rtxn, index, mapper)?,
199            };
200            let current_value = current.top_level_field(key)?;
201            let Some(current_value) = current_value else {
202                changed = true;
203                break;
204            };
205
206            if current_value.get() != updated_value.get() {
207                changed = true;
208                break;
209            }
210            cached_current = Some(current);
211        }
212
213        if !self.from_scratch {
214            // no field deletion or update, so fields that don't appear in `updated` cannot have changed
215            return Ok(changed);
216        }
217
218        if changed {
219            return Ok(true);
220        }
221
222        // we saw all updated fields, and set `changed` if any field wasn't in `current`.
223        // so if there are as many fields in `current` as in `updated`, then nothing changed.
224        // If there is any more fields in `current`, then they are missing in `updated`.
225        let has_deleted_fields = {
226            let current = match cached_current {
227                Some(current) => current,
228                None => self.current(rtxn, index, mapper)?,
229            };
230
231            let mut current_selected_field_count = 0;
232            for entry in current.iter_top_level_fields() {
233                let (key, _) = entry?;
234
235                if selector(key) == PatternMatch::NoMatch {
236                    continue;
237                }
238                current_selected_field_count += 1;
239            }
240
241            current_selected_field_count != updated_selected_field_count
242        };
243
244        Ok(has_deleted_fields)
245    }
246
247    /// Returns `true` if the geo fields have changed.
248    pub fn has_changed_for_geo_fields<'t, Mapper: FieldIdMapper>(
249        &self,
250        rtxn: &'t RoTxn,
251        index: &'t Index,
252        mapper: &'t Mapper,
253    ) -> Result<bool> {
254        let current = self.current(rtxn, index, mapper)?;
255        let current_geo = current.geo_field()?;
256        let updated_geo = self.only_changed_fields().geo_field()?;
257        match (current_geo, updated_geo) {
258            (Some(current_geo), Some(updated_geo)) => {
259                let current: Value =
260                    serde_json::from_str(current_geo.get()).map_err(InternalError::SerdeJson)?;
261                let updated: Value =
262                    serde_json::from_str(updated_geo.get()).map_err(InternalError::SerdeJson)?;
263                Ok(current != updated)
264            }
265            (None, None) => Ok(false),
266            _ => Ok(true),
267        }
268    }
269
270    pub fn only_changed_vectors(
271        &self,
272        doc_alloc: &'doc Bump,
273        embedders: &'doc EmbeddingConfigs,
274    ) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
275        VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders)
276    }
277
278    pub fn merged_vectors<Mapper: FieldIdMapper>(
279        &self,
280        rtxn: &'doc RoTxn,
281        index: &'doc Index,
282        mapper: &'doc Mapper,
283        doc_alloc: &'doc Bump,
284        embedders: &'doc EmbeddingConfigs,
285    ) -> Result<Option<MergedVectorDocument<'doc>>> {
286        if self.from_scratch {
287            MergedVectorDocument::without_db(
288                self.external_document_id,
289                &self.new,
290                doc_alloc,
291                embedders,
292            )
293        } else {
294            MergedVectorDocument::with_db(
295                self.docid,
296                self.external_document_id,
297                index,
298                rtxn,
299                mapper,
300                &self.new,
301                doc_alloc,
302                embedders,
303            )
304        }
305    }
306}