#![doc = include_str!("../README.md")]
#![warn(clippy::all, clippy::pedantic)]
#![allow(clippy::missing_errors_doc, clippy::cast_sign_loss)]
mod stores;
pub use stores::{
artifact::{ArtifactKind, ArtifactStore},
asset::AssetStore,
cache::{CacheCompression, CacheDependency, CacheRead, CacheStore, CacheWrite},
content::ContentStore,
secrets::SecretsStore,
};
use anyhow::bail;
use flexbuffers::Reader;
use ordinary_config::{ContentDefinition, ModelConfig, StorageLimits};
use ordinary_types::{Field, Kind, TimeUnit};
use saferlmdb::{
ConstAccessor, Environment, ReadTransaction, Stat, WriteAccessor, WriteTransaction,
};
use std::{path::Path, str::FromStr, sync::Arc};
pub use bytes;
use bytes::{BufMut, Bytes, BytesMut};
use crate::stores::model::ModelStore;
pub use saferlmdb;
use serde_json::Value;
use tantivy::{
TantivyDocument, Term,
schema::{FAST, INDEXED, STORED, SchemaBuilder, TEXT},
};
use uuid::Uuid;
fn build_schema_for_fields(
builder: &mut SchemaBuilder,
fields: &Vec<Field>,
is_model: bool,
) -> bool {
let mut has_searchable = false;
let mut first_content_indexed = None;
for field in fields {
if !is_model && first_content_indexed.is_none() && field.indexed == Some(true) {
first_content_indexed = Some(field.idx);
}
if field.searchable == Some(true) {
has_searchable = true;
match &field.kind {
Kind::Bool => {
builder.add_bool_field(&field.idx.to_string(), INDEXED);
}
Kind::F32 | Kind::F64 => {
builder.add_f64_field(&field.idx.to_string(), INDEXED);
}
Kind::U8 | Kind::U16 | Kind::U32 | Kind::U64 => {
builder.add_u64_field(&field.idx.to_string(), INDEXED);
}
Kind::I8 | Kind::I16 | Kind::I32 | Kind::I64 => {
builder.add_i64_field(&field.idx.to_string(), INDEXED);
}
Kind::String | Kind::Markdown | Kind::Url => {
builder.add_text_field(&field.idx.to_string(), TEXT);
}
Kind::Json => {
builder.add_json_field(&field.idx.to_string(), TEXT);
}
Kind::Timestamp { unit } => match unit {
TimeUnit::Seconds => {
builder.add_i64_field(&field.idx.to_string(), INDEXED);
}
},
_ => {
tracing::error!("kind '{:?}' is not currently searchable", field.kind);
}
}
}
}
if has_searchable {
if is_model {
builder.add_bytes_field("0", FAST | STORED);
} else if let Some(idx) = first_content_indexed {
builder.add_bytes_field(&idx.to_string(), FAST | STORED);
}
}
has_searchable
}
fn add_field_to_doc_flexbuffer(
doc: &mut TantivyDocument,
field: &Field,
tantivy_field: tantivy::schema::Field,
reader: &Reader<&[u8]>,
) {
match &field.kind {
Kind::Uuid => {
doc.add_bytes(tantivy_field, reader.as_blob().0);
}
Kind::Bool => {
doc.add_bool(tantivy_field, reader.as_bool());
}
Kind::F32 | Kind::F64 => {
doc.add_f64(tantivy_field, reader.as_f64());
}
Kind::U8 | Kind::U16 | Kind::U32 | Kind::U64 => {
doc.add_u64(tantivy_field, reader.as_u64());
}
Kind::I8 | Kind::I16 | Kind::I32 | Kind::I64 => {
doc.add_i64(tantivy_field, reader.as_i64());
}
Kind::String | Kind::Markdown | Kind::Json | Kind::Url => {
doc.add_text(tantivy_field, reader.as_str());
}
Kind::Timestamp { unit } => match unit {
TimeUnit::Seconds => {
doc.add_i64(tantivy_field, reader.as_i64());
}
},
_ => {
tracing::error!("kind '{:?}' is not currently searchable", field.kind);
}
}
}
fn add_field_to_doc_json(
doc: &mut TantivyDocument,
kind: &Kind,
tantivy_field: tantivy::schema::Field,
value: &Value,
) -> anyhow::Result<()> {
match kind {
Kind::Uuid => {
let bytes = match value {
Value::Array(v) => {
let mut uuid_bytes = [0u8; 16];
for (i, value) in v.iter().enumerate() {
if i < 16
&& let Some(val) = value.as_u64()
{
let byte = u8::try_from(val)?;
uuid_bytes[i] = byte;
}
}
uuid_bytes
}
Value::String(v) => match Uuid::from_str(v.as_str()) {
Ok(uuid) => *uuid.as_bytes(),
Err(err) => {
tracing::error!("{err}");
[0u8; 16]
}
},
_ => [0u8; 16],
};
doc.add_bytes(tantivy_field, &bytes[..]);
}
Kind::Bool => {
doc.add_bool(tantivy_field, value.as_bool().unwrap_or(false));
}
Kind::F32 | Kind::F64 => {
doc.add_f64(tantivy_field, value.as_f64().unwrap_or(0.0));
}
Kind::U8 | Kind::U16 | Kind::U32 | Kind::U64 => {
doc.add_u64(tantivy_field, value.as_u64().unwrap_or(0));
}
Kind::I8 | Kind::I16 | Kind::I32 | Kind::I64 => {
doc.add_i64(tantivy_field, value.as_i64().unwrap_or(0));
}
Kind::String | Kind::Markdown | Kind::Json | Kind::Url => {
doc.add_text(tantivy_field, value.as_str().unwrap_or(""));
}
Kind::Timestamp { unit } => match unit {
TimeUnit::Seconds => {
doc.add_i64(tantivy_field, value.as_i64().unwrap_or(0));
}
},
_ => {
tracing::error!("kind '{:?}' is not currently searchable", kind);
}
}
Ok(())
}
fn get_term(kind: &Kind, field: tantivy::schema::Field, value: &[u8]) -> anyhow::Result<Term> {
match kind {
Kind::Uuid => Ok(Term::from_field_bytes(field, value)),
Kind::Bool => Ok(Term::from_field_bool(
field,
if value.len() == 1 {
value[0] == 1
} else {
false
},
)),
Kind::F32 => Ok(Term::from_field_f64(
field,
f32::from_be_bytes(value[..].try_into()?).into(),
)),
Kind::F64 => Ok(Term::from_field_f64(
field,
f64::from_be_bytes(value[..].try_into()?),
)),
Kind::U8 => Ok(Term::from_field_u64(
field,
u8::from_be_bytes(value[..].try_into()?).into(),
)),
Kind::U16 => Ok(Term::from_field_u64(
field,
u16::from_be_bytes(value[..].try_into()?).into(),
)),
Kind::U32 => Ok(Term::from_field_u64(
field,
u32::from_be_bytes(value[..].try_into()?).into(),
)),
Kind::U64 => Ok(Term::from_field_u64(
field,
u64::from_be_bytes(value[..].try_into()?),
)),
Kind::I8 => Ok(Term::from_field_i64(
field,
i8::from_be_bytes(value[..].try_into()?).into(),
)),
Kind::I16 => Ok(Term::from_field_i64(
field,
i16::from_be_bytes(value[..].try_into()?).into(),
)),
Kind::I32 => Ok(Term::from_field_i64(
field,
i32::from_be_bytes(value[..].try_into()?).into(),
)),
Kind::I64 => Ok(Term::from_field_i64(
field,
i64::from_be_bytes(value[..].try_into()?),
)),
Kind::String | Kind::Markdown | Kind::Url => {
Ok(Term::from_field_text(field, std::str::from_utf8(value)?))
}
Kind::Json => Ok(Term::from_field_json_path(
field,
std::str::from_utf8(value)?,
true,
)),
Kind::Timestamp { unit } => match unit {
TimeUnit::Seconds => Ok(Term::from_field_i64(
field,
i64::from_be_bytes(value[..].try_into()?),
)),
},
_ => bail!("cannot search kind {kind:?}"),
}
}
fn field_to_bytes(field: &Field, reader: &Reader<&[u8]>) -> Bytes {
let mut out = BytesMut::new();
match &field.kind {
Kind::Uuid => {
out.put(reader.as_blob().0);
}
Kind::Bool => {
if reader.as_bool() {
out.put_u8(1);
} else {
out.put_u8(0);
}
}
Kind::F32 => out.put_f32(reader.as_f32()),
Kind::F64 => out.put_f64(reader.as_f64()),
Kind::U8 => out.put_u8(reader.as_u8()),
Kind::U16 => out.put_u16(reader.as_u16()),
Kind::U32 => out.put_u32(reader.as_u32()),
Kind::U64 => out.put_u64(reader.as_u64()),
Kind::I8 => out.put_i8(reader.as_i8()),
Kind::I16 => out.put_i16(reader.as_i16()),
Kind::I32 => out.put_i32(reader.as_i32()),
Kind::I64 => out.put_i64(reader.as_i64()),
Kind::String | Kind::Markdown | Kind::Json | Kind::Url => {
out.put(reader.as_str().as_bytes());
}
Kind::Timestamp { unit } => match unit {
TimeUnit::Seconds => out.put_i64(reader.as_i64()),
},
_ => {
tracing::error!(
"kind '{:?}' does not support encrypted/compressed",
field.kind
);
}
}
out.into()
}
fn push_field_from_bytes(
field: &Field,
bytes: &[u8],
dest: &mut flexbuffers::VectorBuilder,
) -> anyhow::Result<()> {
match &field.kind {
Kind::Uuid => dest.push(flexbuffers::Blob(bytes)),
Kind::Bool => dest.push(if bytes.len() == 1 {
bytes[0] == 1
} else {
false
}),
Kind::F32 => dest.push(f32::from_be_bytes(bytes.try_into()?)),
Kind::F64 => dest.push(f64::from_be_bytes(bytes.try_into()?)),
Kind::U8 => dest.push(u8::from_be_bytes(bytes.try_into()?)),
Kind::U16 => dest.push(u16::from_be_bytes(bytes.try_into()?)),
Kind::U32 => dest.push(u32::from_be_bytes(bytes.try_into()?)),
Kind::U64 => dest.push(u64::from_be_bytes(bytes.try_into()?)),
Kind::I8 => dest.push(i8::from_be_bytes(bytes.try_into()?)),
Kind::I16 => dest.push(i16::from_be_bytes(bytes.try_into()?)),
Kind::I32 => dest.push(i32::from_be_bytes(bytes.try_into()?)),
Kind::I64 => dest.push(i64::from_be_bytes(bytes.try_into()?)),
Kind::String | Kind::Markdown | Kind::Json | Kind::Url => {
dest.push(std::str::from_utf8(bytes)?);
}
Kind::Timestamp { unit } => match unit {
TimeUnit::Seconds => dest.push(i64::from_be_bytes(bytes.try_into()?)),
},
_ => {
tracing::error!(
"kind '{:?}' does not support encrypted/compressed",
field.kind
);
}
}
Ok(())
}
enum Transaction<'a> {
Read(&'a ReadTransaction<'a>),
Write(&'a WriteTransaction<'a>),
}
enum Accessor<'a> {
Const(&'a ConstAccessor<'a>),
Write(&'a WriteAccessor<'a>),
}
#[derive(Clone, Debug)]
#[allow(clippy::type_complexity)]
pub struct RefDepth(pub Vec<((u8, u8, Option<[u8; 16]>), RefDepth)>);
#[derive(Clone, Debug)]
pub enum QueryExpression {
Gte,
Gt,
Lte,
Lt,
Eq,
BeginsWith,
}
impl QueryExpression {
#[must_use]
pub fn as_byte(&self) -> u8 {
match self {
Self::Gte => 0,
Self::Lte => 1,
Self::Eq => 2,
Self::Gt => 3,
Self::Lt => 4,
Self::BeginsWith => 5,
}
}
}
pub struct Storage {
env: Arc<Environment>,
pub content: ContentStore,
pub artifact: ArtifactStore,
pub asset: AssetStore,
pub cache: CacheStore,
pub secrets: SecretsStore,
pub model: ModelStore,
}
impl Storage {
#[allow(clippy::too_many_lines)]
pub fn new(
limits: StorageLimits,
model_configs: Vec<ModelConfig>,
content_defs: Vec<ContentDefinition>,
encryption_key: [u8; 32],
env: &Arc<Environment>,
search_dir: impl AsRef<Path>,
log_size: bool,
) -> anyhow::Result<Self> {
Ok(Self {
env: env.clone(),
model: ModelStore::new(
limits.model,
model_configs,
encryption_key,
env,
&search_dir,
log_size,
)?,
content: ContentStore::new(limits.content, content_defs, env, &search_dir, log_size)?,
artifact: ArtifactStore::new(limits.artifact, env, log_size)?,
asset: AssetStore::new(limits.assets, env, log_size)?,
cache: CacheStore::new(limits.cache, env, log_size)?,
secrets: SecretsStore::new(env, encryption_key)?,
})
}
pub fn stat(&self) -> anyhow::Result<Stat> {
let stat = self.env.stat()?;
Ok(stat)
}
pub fn write_txn(&self) -> anyhow::Result<WriteTransaction<'_>> {
let txn = WriteTransaction::new(self.env.clone())?;
Ok(txn)
}
}