1#![doc = include_str!("../README.md")]
2#![warn(clippy::all, clippy::pedantic)]
3#![allow(clippy::missing_errors_doc, clippy::cast_sign_loss)]
4
5mod stores;
10
11pub use stores::{
12 artifact::{ArtifactKind, ArtifactStore},
13 asset::AssetStore,
14 cache::{CacheCompression, CacheDependency, CacheRead, CacheStore, CacheWrite},
15 content::ContentStore,
16 secrets::SecretsStore,
17};
18
19use anyhow::bail;
20use flexbuffers::Reader;
21use ordinary_config::{ContentDefinition, ModelConfig, StorageLimits};
22use ordinary_types::{Field, Kind, TimeUnit};
23use saferlmdb::{
24 ConstAccessor, Environment, ReadTransaction, Stat, WriteAccessor, WriteTransaction,
25};
26use std::{path::Path, str::FromStr, sync::Arc};
27
28pub use bytes;
29use bytes::{BufMut, Bytes, BytesMut};
30
31use crate::stores::model::ModelStore;
32pub use saferlmdb;
33use serde_json::Value;
34use tantivy::{
35 TantivyDocument, Term,
36 schema::{FAST, INDEXED, STORED, SchemaBuilder, TEXT},
37};
38use uuid::Uuid;
39
40fn build_schema_for_fields(
41 builder: &mut SchemaBuilder,
42 fields: &Vec<Field>,
43 is_model: bool,
44) -> bool {
45 let mut has_searchable = false;
46 let mut first_content_indexed = None;
47
48 for field in fields {
49 if !is_model && first_content_indexed.is_none() && field.indexed == Some(true) {
50 first_content_indexed = Some(field.idx);
51 }
52
53 if field.searchable == Some(true) {
54 has_searchable = true;
55
56 match &field.kind {
57 Kind::Bool => {
58 builder.add_bool_field(&field.idx.to_string(), INDEXED);
59 }
60 Kind::F32 | Kind::F64 => {
61 builder.add_f64_field(&field.idx.to_string(), INDEXED);
62 }
63 Kind::U8 | Kind::U16 | Kind::U32 | Kind::U64 => {
64 builder.add_u64_field(&field.idx.to_string(), INDEXED);
65 }
66 Kind::I8 | Kind::I16 | Kind::I32 | Kind::I64 => {
67 builder.add_i64_field(&field.idx.to_string(), INDEXED);
68 }
69 Kind::String | Kind::Markdown | Kind::Url => {
70 builder.add_text_field(&field.idx.to_string(), TEXT);
71 }
72 Kind::Json => {
73 builder.add_json_field(&field.idx.to_string(), TEXT);
74 }
75 Kind::Timestamp { unit } => match unit {
76 TimeUnit::Seconds => {
77 builder.add_i64_field(&field.idx.to_string(), INDEXED);
78 }
79 },
80 _ => {
81 tracing::error!("kind '{:?}' is not currently searchable", field.kind);
82 }
83 }
84 }
85 }
86
87 if has_searchable {
88 if is_model {
89 builder.add_bytes_field("0", FAST | STORED);
91 } else if let Some(idx) = first_content_indexed {
92 builder.add_bytes_field(&idx.to_string(), FAST | STORED);
94 }
95 }
96
97 has_searchable
98}
99
100fn add_field_to_doc_flexbuffer(
101 doc: &mut TantivyDocument,
102 field: &Field,
103 tantivy_field: tantivy::schema::Field,
104 reader: &Reader<&[u8]>,
105) {
106 match &field.kind {
107 Kind::Uuid => {
108 doc.add_bytes(tantivy_field, reader.as_blob().0);
109 }
110 Kind::Bool => {
111 doc.add_bool(tantivy_field, reader.as_bool());
112 }
113 Kind::F32 | Kind::F64 => {
114 doc.add_f64(tantivy_field, reader.as_f64());
115 }
116 Kind::U8 | Kind::U16 | Kind::U32 | Kind::U64 => {
117 doc.add_u64(tantivy_field, reader.as_u64());
118 }
119 Kind::I8 | Kind::I16 | Kind::I32 | Kind::I64 => {
120 doc.add_i64(tantivy_field, reader.as_i64());
121 }
122 Kind::String | Kind::Markdown | Kind::Json | Kind::Url => {
123 doc.add_text(tantivy_field, reader.as_str());
124 }
125 Kind::Timestamp { unit } => match unit {
126 TimeUnit::Seconds => {
127 doc.add_i64(tantivy_field, reader.as_i64());
128 }
129 },
130 _ => {
131 tracing::error!("kind '{:?}' is not currently searchable", field.kind);
132 }
133 }
134}
135
136fn add_field_to_doc_json(
137 doc: &mut TantivyDocument,
138 kind: &Kind,
139 tantivy_field: tantivy::schema::Field,
140 value: &Value,
141) -> anyhow::Result<()> {
142 match kind {
143 Kind::Uuid => {
144 let bytes = match value {
145 Value::Array(v) => {
146 let mut uuid_bytes = [0u8; 16];
147
148 for (i, value) in v.iter().enumerate() {
149 if i < 16
150 && let Some(val) = value.as_u64()
151 {
152 let byte = u8::try_from(val)?;
153 uuid_bytes[i] = byte;
154 }
155 }
156 uuid_bytes
157 }
158 Value::String(v) => match Uuid::from_str(v.as_str()) {
159 Ok(uuid) => *uuid.as_bytes(),
160 Err(err) => {
161 tracing::error!("{err}");
162 [0u8; 16]
163 }
164 },
165 _ => [0u8; 16],
166 };
167
168 doc.add_bytes(tantivy_field, &bytes[..]);
169 }
170 Kind::Bool => {
171 doc.add_bool(tantivy_field, value.as_bool().unwrap_or(false));
172 }
173 Kind::F32 | Kind::F64 => {
174 doc.add_f64(tantivy_field, value.as_f64().unwrap_or(0.0));
175 }
176 Kind::U8 | Kind::U16 | Kind::U32 | Kind::U64 => {
177 doc.add_u64(tantivy_field, value.as_u64().unwrap_or(0));
178 }
179 Kind::I8 | Kind::I16 | Kind::I32 | Kind::I64 => {
180 doc.add_i64(tantivy_field, value.as_i64().unwrap_or(0));
181 }
182 Kind::String | Kind::Markdown | Kind::Json | Kind::Url => {
183 doc.add_text(tantivy_field, value.as_str().unwrap_or(""));
184 }
185 Kind::Timestamp { unit } => match unit {
186 TimeUnit::Seconds => {
187 doc.add_i64(tantivy_field, value.as_i64().unwrap_or(0));
188 }
189 },
190 _ => {
191 tracing::error!("kind '{:?}' is not currently searchable", kind);
192 }
193 }
194
195 Ok(())
196}
197
198fn get_term(kind: &Kind, field: tantivy::schema::Field, value: &[u8]) -> anyhow::Result<Term> {
199 match kind {
200 Kind::Uuid => Ok(Term::from_field_bytes(field, value)),
201 Kind::Bool => Ok(Term::from_field_bool(
202 field,
203 if value.len() == 1 {
204 value[0] == 1
205 } else {
206 false
207 },
208 )),
209 Kind::F32 => Ok(Term::from_field_f64(
210 field,
211 f32::from_be_bytes(value[..].try_into()?).into(),
212 )),
213 Kind::F64 => Ok(Term::from_field_f64(
214 field,
215 f64::from_be_bytes(value[..].try_into()?),
216 )),
217 Kind::U8 => Ok(Term::from_field_u64(
218 field,
219 u8::from_be_bytes(value[..].try_into()?).into(),
220 )),
221 Kind::U16 => Ok(Term::from_field_u64(
222 field,
223 u16::from_be_bytes(value[..].try_into()?).into(),
224 )),
225 Kind::U32 => Ok(Term::from_field_u64(
226 field,
227 u32::from_be_bytes(value[..].try_into()?).into(),
228 )),
229 Kind::U64 => Ok(Term::from_field_u64(
230 field,
231 u64::from_be_bytes(value[..].try_into()?),
232 )),
233 Kind::I8 => Ok(Term::from_field_i64(
234 field,
235 i8::from_be_bytes(value[..].try_into()?).into(),
236 )),
237 Kind::I16 => Ok(Term::from_field_i64(
238 field,
239 i16::from_be_bytes(value[..].try_into()?).into(),
240 )),
241 Kind::I32 => Ok(Term::from_field_i64(
242 field,
243 i32::from_be_bytes(value[..].try_into()?).into(),
244 )),
245 Kind::I64 => Ok(Term::from_field_i64(
246 field,
247 i64::from_be_bytes(value[..].try_into()?),
248 )),
249 Kind::String | Kind::Markdown | Kind::Url => {
250 Ok(Term::from_field_text(field, std::str::from_utf8(value)?))
251 }
252 Kind::Json => Ok(Term::from_field_json_path(
253 field,
254 std::str::from_utf8(value)?,
255 true,
256 )),
257 Kind::Timestamp { unit } => match unit {
258 TimeUnit::Seconds => Ok(Term::from_field_i64(
259 field,
260 i64::from_be_bytes(value[..].try_into()?),
261 )),
262 },
263 _ => bail!("cannot search kind {kind:?}"),
264 }
265}
266
267fn field_to_bytes(field: &Field, reader: &Reader<&[u8]>) -> Bytes {
268 let mut out = BytesMut::new();
269
270 match &field.kind {
271 Kind::Uuid => {
272 out.put(reader.as_blob().0);
273 }
274 Kind::Bool => {
275 if reader.as_bool() {
276 out.put_u8(1);
277 } else {
278 out.put_u8(0);
279 }
280 }
281 Kind::F32 => out.put_f32(reader.as_f32()),
282 Kind::F64 => out.put_f64(reader.as_f64()),
283 Kind::U8 => out.put_u8(reader.as_u8()),
284 Kind::U16 => out.put_u16(reader.as_u16()),
285 Kind::U32 => out.put_u32(reader.as_u32()),
286 Kind::U64 => out.put_u64(reader.as_u64()),
287 Kind::I8 => out.put_i8(reader.as_i8()),
288 Kind::I16 => out.put_i16(reader.as_i16()),
289 Kind::I32 => out.put_i32(reader.as_i32()),
290 Kind::I64 => out.put_i64(reader.as_i64()),
291 Kind::String | Kind::Markdown | Kind::Json | Kind::Url => {
292 out.put(reader.as_str().as_bytes());
293 }
294 Kind::Timestamp { unit } => match unit {
295 TimeUnit::Seconds => out.put_i64(reader.as_i64()),
296 },
297 _ => {
298 tracing::error!(
299 "kind '{:?}' does not support encrypted/compressed",
300 field.kind
301 );
302 }
303 }
304
305 out.into()
306}
307
308fn push_field_from_bytes(
309 field: &Field,
310 bytes: &[u8],
311 dest: &mut flexbuffers::VectorBuilder,
312) -> anyhow::Result<()> {
313 match &field.kind {
314 Kind::Uuid => dest.push(flexbuffers::Blob(bytes)),
315 Kind::Bool => dest.push(if bytes.len() == 1 {
316 bytes[0] == 1
317 } else {
318 false
319 }),
320 Kind::F32 => dest.push(f32::from_be_bytes(bytes.try_into()?)),
321 Kind::F64 => dest.push(f64::from_be_bytes(bytes.try_into()?)),
322 Kind::U8 => dest.push(u8::from_be_bytes(bytes.try_into()?)),
323 Kind::U16 => dest.push(u16::from_be_bytes(bytes.try_into()?)),
324 Kind::U32 => dest.push(u32::from_be_bytes(bytes.try_into()?)),
325 Kind::U64 => dest.push(u64::from_be_bytes(bytes.try_into()?)),
326 Kind::I8 => dest.push(i8::from_be_bytes(bytes.try_into()?)),
327 Kind::I16 => dest.push(i16::from_be_bytes(bytes.try_into()?)),
328 Kind::I32 => dest.push(i32::from_be_bytes(bytes.try_into()?)),
329 Kind::I64 => dest.push(i64::from_be_bytes(bytes.try_into()?)),
330 Kind::String | Kind::Markdown | Kind::Json | Kind::Url => {
331 dest.push(std::str::from_utf8(bytes)?);
332 }
333 Kind::Timestamp { unit } => match unit {
334 TimeUnit::Seconds => dest.push(i64::from_be_bytes(bytes.try_into()?)),
335 },
336 _ => {
337 tracing::error!(
338 "kind '{:?}' does not support encrypted/compressed",
339 field.kind
340 );
341 }
342 }
343
344 Ok(())
345}
346
347enum Transaction<'a> {
348 Read(&'a ReadTransaction<'a>),
349 Write(&'a WriteTransaction<'a>),
350}
351
352enum Accessor<'a> {
353 Const(&'a ConstAccessor<'a>),
354 Write(&'a WriteAccessor<'a>),
355}
356
357#[derive(Clone, Debug)]
360#[allow(clippy::type_complexity)]
361pub struct RefDepth(pub Vec<((u8, u8, Option<[u8; 16]>), RefDepth)>);
362
363#[derive(Clone, Debug)]
365pub enum QueryExpression {
366 Gte,
367 Gt,
368 Lte,
369 Lt,
370 Eq,
371 BeginsWith,
372}
373
374impl QueryExpression {
375 #[must_use]
376 pub fn as_byte(&self) -> u8 {
377 match self {
378 Self::Gte => 0,
379 Self::Lte => 1,
380 Self::Eq => 2,
381 Self::Gt => 3,
382 Self::Lt => 4,
383 Self::BeginsWith => 5,
384 }
385 }
386}
387
388pub struct Storage {
389 env: Arc<Environment>,
391
392 pub content: ContentStore,
393 pub artifact: ArtifactStore,
394 pub asset: AssetStore,
395 pub cache: CacheStore,
396 pub secrets: SecretsStore,
397 pub model: ModelStore,
398}
399
400impl Storage {
402 #[allow(clippy::too_many_lines)]
403 pub fn new(
404 limits: StorageLimits,
405 model_configs: Vec<ModelConfig>,
406 content_defs: Vec<ContentDefinition>,
407 encryption_key: [u8; 32],
408 env: &Arc<Environment>,
409 search_dir: impl AsRef<Path>,
410 log_size: bool,
411 ) -> anyhow::Result<Self> {
412 Ok(Self {
413 env: env.clone(),
414
415 model: ModelStore::new(
416 limits.model,
417 model_configs,
418 encryption_key,
419 env,
420 &search_dir,
421 log_size,
422 )?,
423 content: ContentStore::new(limits.content, content_defs, env, &search_dir, log_size)?,
424 artifact: ArtifactStore::new(limits.artifact, env, log_size)?,
425 asset: AssetStore::new(limits.assets, env, log_size)?,
426 cache: CacheStore::new(limits.cache, env, log_size)?,
427 secrets: SecretsStore::new(env, encryption_key)?,
428 })
429 }
430
431 pub fn stat(&self) -> anyhow::Result<Stat> {
432 let stat = self.env.stat()?;
433 Ok(stat)
434 }
435
436 pub fn write_txn(&self) -> anyhow::Result<WriteTransaction<'_>> {
437 let txn = WriteTransaction::new(self.env.clone())?;
438 Ok(txn)
439 }
440}