1use bumpalo::Bump;
2use heed::RoTxn;
3use serde_json::Value;
4
5use super::document::{
6 Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
7};
8use super::vector_document::{
9 MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
10};
11use crate::attribute_patterns::PatternMatch;
12use crate::documents::FieldIdMapper;
13use crate::vector::EmbeddingConfigs;
14use crate::{DocumentId, Index, InternalError, Result};
15
16pub enum DocumentChange<'doc> {
17 Deletion(Deletion<'doc>),
18 Update(Update<'doc>),
19 Insertion(Insertion<'doc>),
20}
21
22pub struct Deletion<'doc> {
23 docid: DocumentId,
24 external_document_id: &'doc str,
25}
26
27pub struct Update<'doc> {
28 docid: DocumentId,
29 external_document_id: &'doc str,
30 new: Versions<'doc>,
31 from_scratch: bool,
32}
33
34pub struct Insertion<'doc> {
35 docid: DocumentId,
36 external_document_id: &'doc str,
37 new: Versions<'doc>,
38}
39
40impl<'doc> DocumentChange<'doc> {
41 pub fn docid(&self) -> DocumentId {
42 match &self {
43 Self::Deletion(inner) => inner.docid(),
44 Self::Update(inner) => inner.docid(),
45 Self::Insertion(inner) => inner.docid(),
46 }
47 }
48
49 pub fn external_docid(&self) -> &'doc str {
50 match self {
51 DocumentChange::Deletion(deletion) => deletion.external_document_id(),
52 DocumentChange::Update(update) => update.external_document_id(),
53 DocumentChange::Insertion(insertion) => insertion.external_document_id(),
54 }
55 }
56}
57
58impl<'doc> Deletion<'doc> {
59 pub fn create(docid: DocumentId, external_document_id: &'doc str) -> Self {
60 Self { docid, external_document_id }
61 }
62
63 pub fn docid(&self) -> DocumentId {
64 self.docid
65 }
66
67 pub fn external_document_id(&self) -> &'doc str {
68 self.external_document_id
69 }
70
71 pub fn current<'a, Mapper: FieldIdMapper>(
72 &self,
73 rtxn: &'a RoTxn,
74 index: &'a Index,
75 mapper: &'a Mapper,
76 ) -> Result<DocumentFromDb<'a, Mapper>> {
77 Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper)?.ok_or(
78 crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
79 )?)
80 }
81}
82
83impl<'doc> Insertion<'doc> {
84 pub fn create(docid: DocumentId, external_document_id: &'doc str, new: Versions<'doc>) -> Self {
85 Insertion { docid, external_document_id, new }
86 }
87
88 pub fn docid(&self) -> DocumentId {
89 self.docid
90 }
91
92 pub fn external_document_id(&self) -> &'doc str {
93 self.external_document_id
94 }
95 pub fn inserted(&self) -> DocumentFromVersions<'_, 'doc> {
96 DocumentFromVersions::new(&self.new)
97 }
98
99 pub fn inserted_vectors(
100 &self,
101 doc_alloc: &'doc Bump,
102 embedders: &'doc EmbeddingConfigs,
103 ) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
104 VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders)
105 }
106}
107
108impl<'doc> Update<'doc> {
109 pub fn create(
110 docid: DocumentId,
111 external_document_id: &'doc str,
112 new: Versions<'doc>,
113 from_scratch: bool,
114 ) -> Self {
115 Update { docid, new, external_document_id, from_scratch }
116 }
117
118 pub fn docid(&self) -> DocumentId {
119 self.docid
120 }
121
122 pub fn external_document_id(&self) -> &'doc str {
123 self.external_document_id
124 }
125 pub fn current<'a, Mapper: FieldIdMapper>(
126 &self,
127 rtxn: &'a RoTxn,
128 index: &'a Index,
129 mapper: &'a Mapper,
130 ) -> Result<DocumentFromDb<'a, Mapper>> {
131 Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper)?.ok_or(
132 crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
133 )?)
134 }
135
136 pub fn current_vectors<'a, Mapper: FieldIdMapper>(
137 &self,
138 rtxn: &'a RoTxn,
139 index: &'a Index,
140 mapper: &'a Mapper,
141 doc_alloc: &'a Bump,
142 ) -> Result<VectorDocumentFromDb<'a>> {
143 Ok(VectorDocumentFromDb::new(self.docid, index, rtxn, mapper, doc_alloc)?.ok_or(
144 crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
145 )?)
146 }
147
148 pub fn only_changed_fields(&self) -> DocumentFromVersions<'_, 'doc> {
149 DocumentFromVersions::new(&self.new)
150 }
151
152 pub fn merged<'t, Mapper: FieldIdMapper>(
153 &self,
154 rtxn: &'t RoTxn,
155 index: &'t Index,
156 mapper: &'t Mapper,
157 ) -> Result<MergedDocument<'_, 'doc, 't, Mapper>> {
158 if self.from_scratch {
159 Ok(MergedDocument::without_db(DocumentFromVersions::new(&self.new)))
160 } else {
161 MergedDocument::with_db(
162 self.docid,
163 rtxn,
164 index,
165 mapper,
166 DocumentFromVersions::new(&self.new),
167 )
168 }
169 }
170
171 pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
178 &self,
179 selector: &mut impl FnMut(&str) -> PatternMatch,
180 rtxn: &'t RoTxn,
181 index: &'t Index,
182 mapper: &'t Mapper,
183 ) -> Result<bool> {
184 let mut changed = false;
185 let mut cached_current = None;
186 let mut updated_selected_field_count = 0;
187
188 for entry in self.only_changed_fields().iter_top_level_fields() {
189 let (key, updated_value) = entry?;
190
191 if selector(key) == PatternMatch::NoMatch {
192 continue;
193 }
194
195 updated_selected_field_count += 1;
196 let current = match cached_current {
197 Some(current) => current,
198 None => self.current(rtxn, index, mapper)?,
199 };
200 let current_value = current.top_level_field(key)?;
201 let Some(current_value) = current_value else {
202 changed = true;
203 break;
204 };
205
206 if current_value.get() != updated_value.get() {
207 changed = true;
208 break;
209 }
210 cached_current = Some(current);
211 }
212
213 if !self.from_scratch {
214 return Ok(changed);
216 }
217
218 if changed {
219 return Ok(true);
220 }
221
222 let has_deleted_fields = {
226 let current = match cached_current {
227 Some(current) => current,
228 None => self.current(rtxn, index, mapper)?,
229 };
230
231 let mut current_selected_field_count = 0;
232 for entry in current.iter_top_level_fields() {
233 let (key, _) = entry?;
234
235 if selector(key) == PatternMatch::NoMatch {
236 continue;
237 }
238 current_selected_field_count += 1;
239 }
240
241 current_selected_field_count != updated_selected_field_count
242 };
243
244 Ok(has_deleted_fields)
245 }
246
247 pub fn has_changed_for_geo_fields<'t, Mapper: FieldIdMapper>(
249 &self,
250 rtxn: &'t RoTxn,
251 index: &'t Index,
252 mapper: &'t Mapper,
253 ) -> Result<bool> {
254 let current = self.current(rtxn, index, mapper)?;
255 let current_geo = current.geo_field()?;
256 let updated_geo = self.only_changed_fields().geo_field()?;
257 match (current_geo, updated_geo) {
258 (Some(current_geo), Some(updated_geo)) => {
259 let current: Value =
260 serde_json::from_str(current_geo.get()).map_err(InternalError::SerdeJson)?;
261 let updated: Value =
262 serde_json::from_str(updated_geo.get()).map_err(InternalError::SerdeJson)?;
263 Ok(current != updated)
264 }
265 (None, None) => Ok(false),
266 _ => Ok(true),
267 }
268 }
269
270 pub fn only_changed_vectors(
271 &self,
272 doc_alloc: &'doc Bump,
273 embedders: &'doc EmbeddingConfigs,
274 ) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
275 VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders)
276 }
277
278 pub fn merged_vectors<Mapper: FieldIdMapper>(
279 &self,
280 rtxn: &'doc RoTxn,
281 index: &'doc Index,
282 mapper: &'doc Mapper,
283 doc_alloc: &'doc Bump,
284 embedders: &'doc EmbeddingConfigs,
285 ) -> Result<Option<MergedVectorDocument<'doc>>> {
286 if self.from_scratch {
287 MergedVectorDocument::without_db(
288 self.external_document_id,
289 &self.new,
290 doc_alloc,
291 embedders,
292 )
293 } else {
294 MergedVectorDocument::with_db(
295 self.docid,
296 self.external_document_id,
297 index,
298 rtxn,
299 mapper,
300 &self.new,
301 doc_alloc,
302 embedders,
303 )
304 }
305 }
306}