1use std::iter;
2use std::ops::ControlFlow;
3use std::result::Result as StdResult;
4
5use bumpalo::Bump;
6use serde_json::value::RawValue;
7use serde_json::Value;
8
9use crate::fields_ids_map::MutFieldIdMapper;
10use crate::update::new::indexer::de::{match_component, DeOrBumpStr};
11use crate::update::new::KvReaderFieldId;
12use crate::{FieldId, InternalError, Object, Result, UserError};
13
14const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
16
17pub const DEFAULT_PRIMARY_KEY: &str = "id";
19
20pub trait FieldIdMapper {
22 fn id(&self, name: &str) -> Option<FieldId>;
26
27 fn name(&self, id: FieldId) -> Option<&str>;
28}
29
30impl<T> FieldIdMapper for &T
31where
32 T: FieldIdMapper,
33{
34 fn id(&self, name: &str) -> Option<FieldId> {
35 T::id(self, name)
36 }
37
38 fn name(&self, id: FieldId) -> Option<&str> {
39 T::name(self, id)
40 }
41}
42
43#[derive(Debug, Clone, Copy)]
46pub enum PrimaryKey<'a> {
47 Flat { name: &'a str, field_id: FieldId },
48 Nested { name: &'a str },
49}
50
51pub enum DocumentIdExtractionError {
52 InvalidDocumentId(UserError),
53 MissingDocumentId,
54 TooManyDocumentIds(usize),
55}
56
57impl<'a> PrimaryKey<'a> {
58 pub fn new(path: &'a str, fields: &impl FieldIdMapper) -> Option<Self> {
59 Some(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
60 Self::Nested { name: path }
61 } else {
62 let field_id = fields.id(path)?;
63 Self::Flat { name: path, field_id }
64 })
65 }
66
67 pub fn new_or_insert(
68 path: &'a str,
69 fields: &mut impl MutFieldIdMapper,
70 ) -> StdResult<Self, UserError> {
71 Ok(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
72 Self::Nested { name: path }
73 } else {
74 let field_id = fields.insert(path).ok_or(UserError::AttributeLimitReached)?;
75 Self::Flat { name: path, field_id }
76 })
77 }
78
79 pub fn name(&self) -> &'a str {
80 match self {
81 PrimaryKey::Flat { name, .. } => name,
82 PrimaryKey::Nested { name } => name,
83 }
84 }
85
86 pub fn document_id(
87 &self,
88 document: &obkv::KvReader<FieldId>,
89 fields: &impl FieldIdMapper,
90 ) -> Result<StdResult<String, DocumentIdExtractionError>> {
91 match self {
92 PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
93 Some(document_id_bytes) => {
94 let document_id = serde_json::from_slice(document_id_bytes)
95 .map_err(InternalError::SerdeJson)?;
96 match validate_document_id_value(document_id) {
97 Ok(document_id) => Ok(Ok(document_id)),
98 Err(user_error) => {
99 Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
100 }
101 }
102 }
103 None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
104 },
105 nested @ PrimaryKey::Nested { .. } => {
106 let mut matching_documents_ids = Vec::new();
107 for (first_level_name, right) in nested.possible_level_names() {
108 if let Some(field_id) = fields.id(first_level_name) {
109 if let Some(value_bytes) = document.get(field_id) {
110 let object = serde_json::from_slice(value_bytes)
111 .map_err(InternalError::SerdeJson)?;
112 fetch_matching_values(object, right, &mut matching_documents_ids);
113
114 if matching_documents_ids.len() >= 2 {
115 return Ok(Err(DocumentIdExtractionError::TooManyDocumentIds(
116 matching_documents_ids.len(),
117 )));
118 }
119 }
120 }
121 }
122
123 match matching_documents_ids.pop() {
124 Some(document_id) => match validate_document_id_value(document_id) {
125 Ok(document_id) => Ok(Ok(document_id)),
126 Err(user_error) => {
127 Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
128 }
129 },
130 None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
131 }
132 }
133 }
134 }
135
136 pub fn extract_docid_from_db<'pl, 'bump: 'pl, Mapper: FieldIdMapper>(
137 &self,
138 document: &'pl KvReaderFieldId,
139 db_fields_ids_map: &Mapper,
140 indexer: &'bump Bump,
141 ) -> Result<DeOrBumpStr<'pl, 'bump>> {
142 use serde::Deserializer as _;
143
144 match self {
145 PrimaryKey::Flat { name: _, field_id } => {
146 let Some(document_id) = document.get(*field_id) else {
147 return Err(InternalError::DocumentsError(
148 crate::documents::Error::InvalidDocumentFormat,
149 )
150 .into());
151 };
152
153 let document_id: &RawValue =
154 serde_json::from_slice(document_id).map_err(InternalError::SerdeJson)?;
155
156 let document_id = document_id
157 .deserialize_any(crate::update::new::indexer::de::DocumentIdVisitor(indexer))
158 .map_err(InternalError::SerdeJson)?;
159
160 let external_document_id = match document_id {
161 Ok(document_id) => Ok(document_id),
162 Err(_) => Err(InternalError::DocumentsError(
163 crate::documents::Error::InvalidDocumentFormat,
164 )),
165 }?;
166
167 Ok(external_document_id)
168 }
169 nested @ PrimaryKey::Nested { name: _ } => {
170 let mut docid = None;
171 for (first_level, right) in nested.possible_level_names() {
172 let Some(fid) = db_fields_ids_map.id(first_level) else { continue };
173
174 let Some(value) = document.get(fid) else { continue };
175 let value: &RawValue =
176 serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
177 match match_component(first_level, right, value, indexer, &mut docid) {
178 ControlFlow::Continue(()) => continue,
179 ControlFlow::Break(Ok(_)) => {
180 return Err(InternalError::DocumentsError(
181 crate::documents::Error::InvalidDocumentFormat,
182 )
183 .into())
184 }
185 ControlFlow::Break(Err(err)) => {
186 return Err(InternalError::SerdeJson(err).into())
187 }
188 }
189 }
190 Ok(docid.ok_or(InternalError::DocumentsError(
191 crate::documents::Error::InvalidDocumentFormat,
192 ))?)
193 }
194 }
195 }
196
197 pub fn extract_fields_and_docid<'pl, 'bump: 'pl, Mapper: MutFieldIdMapper>(
198 &self,
199 document: &'pl RawValue,
200 new_fields_ids_map: &mut Mapper,
201 indexer: &'bump Bump,
202 ) -> Result<DeOrBumpStr<'pl, 'bump>> {
203 use serde::Deserializer as _;
204 let res = document
205 .deserialize_map(crate::update::new::indexer::de::FieldAndDocidExtractor::new(
206 new_fields_ids_map,
207 self,
208 indexer,
209 ))
210 .map_err(UserError::SerdeJson)??;
211
212 let external_document_id = match res {
213 Ok(document_id) => Ok(document_id),
214 Err(DocumentIdExtractionError::InvalidDocumentId(e)) => Err(e),
215 Err(DocumentIdExtractionError::MissingDocumentId) => {
216 Err(UserError::MissingDocumentId {
217 primary_key: self.name().to_string(),
218 document: serde_json::from_str(document.get()).unwrap(),
219 })
220 }
221 Err(DocumentIdExtractionError::TooManyDocumentIds(_)) => {
222 Err(UserError::TooManyDocumentIds {
223 primary_key: self.name().to_string(),
224 document: serde_json::from_str(document.get()).unwrap(),
225 })
226 }
227 }?;
228
229 Ok(external_document_id)
230 }
231
232 pub fn possible_level_names(&self) -> impl Iterator<Item = (&'a str, &'a str)> + '_ {
235 let name = self.name();
236 name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
237 .map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
238 .chain(iter::once((name, "")))
239 }
240}
241
242fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
243 match value {
244 Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
245 otherwise => output.push(otherwise),
246 }
247}
248
249fn fetch_matching_values_in_object(
250 object: Object,
251 selector: &str,
252 base_key: &str,
253 output: &mut Vec<Value>,
254) {
255 for (key, value) in object {
256 let base_key = if base_key.is_empty() {
257 key.to_string()
258 } else {
259 format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
260 };
261
262 if starts_with(selector, &base_key) {
263 match value {
264 Value::Object(object) => {
265 fetch_matching_values_in_object(object, selector, &base_key, output)
266 }
267 value => output.push(value),
268 }
269 }
270 }
271}
272
273fn starts_with(selector: &str, key: &str) -> bool {
274 selector.strip_prefix(key).is_some_and(|tail| {
275 tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
276 })
277}
278
279pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
282 if document_id.is_empty()
283 || document_id.len() >= 512
284 || !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
285 {
286 None
287 } else {
288 Some(document_id)
289 }
290}
291
292pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserError> {
293 match document_id {
294 Value::String(string) => match validate_document_id_str(&string) {
295 Some(s) if s.len() == string.len() => Ok(string),
296 Some(s) => Ok(s.to_string()),
297 None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
298 },
299 Value::Number(number) if !number.is_f64() => Ok(number.to_string()),
301 content => Err(UserError::InvalidDocumentId { document_id: content }),
302 }
303}