#[allow(unused_imports)]
use alloc::collections::BTreeMap;
#[allow(unused_imports)]
use core::marker::PhantomData;
use jacquard_common::CowStr;
use jacquard_common::deps::bytes::Bytes;
#[allow(unused_imports)]
use jacquard_common::deps::codegen::unicode_segmentation::UnicodeSegmentation;
use jacquard_common::types::collection::{Collection, RecordError};
use jacquard_common::types::string::{AtUri, Cid, Datetime};
use jacquard_common::types::uri::{RecordUri, UriError};
use jacquard_common::types::value::Data;
use jacquard_common::xrpc::XrpcResp;
use jacquard_derive::{IntoStatic, lexicon, open_union};
use jacquard_lexicon::lexicon::LexiconDoc;
use jacquard_lexicon::schema::LexiconSchema;
#[allow(unused_imports)]
use jacquard_lexicon::validation::{ConstraintError, ValidationPath};
use serde::{Serialize, Deserialize};
use crate::science_alt::dataset::storage_blobs::StorageBlobs;
use crate::science_alt::dataset::storage_http::StorageHttp;
use crate::science_alt::dataset::storage_s3::StorageS3;
use crate::science_alt::dataset::entry;
#[lexicon]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic, Default)]
#[serde(rename_all = "camelCase")]
pub struct DatasetSize<'a> {
#[serde(skip_serializing_if = "Option::is_none")]
pub bytes: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub samples: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub shards: Option<i64>,
}
#[lexicon]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic)]
#[serde(rename_all = "camelCase")]
pub struct Entry<'a> {
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub content_metadata: Option<Data<'a>>,
pub created_at: Datetime,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub description: Option<CowStr<'a>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub license: Option<CowStr<'a>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default, with = "jacquard_common::opt_serde_bytes_helper")]
pub metadata: Option<Bytes>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub metadata_schema_ref: Option<AtUri<'a>>,
#[serde(borrow)]
pub name: CowStr<'a>,
#[serde(borrow)]
pub schema_ref: AtUri<'a>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub size: Option<entry::DatasetSize<'a>>,
#[serde(borrow)]
pub storage: EntryStorage<'a>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub tags: Option<Vec<CowStr<'a>>>,
}
#[open_union]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic)]
#[serde(tag = "$type")]
#[serde(bound(deserialize = "'de: 'a"))]
pub enum EntryStorage<'a> {
#[serde(rename = "science.alt.dataset.storageHttp")]
StorageHttp(Box<StorageHttp<'a>>),
#[serde(rename = "science.alt.dataset.storageS3")]
StorageS3(Box<StorageS3<'a>>),
#[serde(rename = "science.alt.dataset.storageBlobs")]
StorageBlobs(Box<StorageBlobs<'a>>),
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic)]
#[serde(rename_all = "camelCase")]
pub struct EntryGetRecordOutput<'a> {
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub cid: Option<Cid<'a>>,
#[serde(borrow)]
pub uri: AtUri<'a>,
#[serde(borrow)]
pub value: Entry<'a>,
}
#[lexicon]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic, Default)]
#[serde(rename_all = "camelCase")]
pub struct ShardChecksum<'a> {
#[serde(borrow)]
pub algorithm: CowStr<'a>,
#[serde(borrow)]
pub digest: CowStr<'a>,
}
impl<'a> Entry<'a> {
pub fn uri(
uri: impl Into<CowStr<'a>>,
) -> Result<RecordUri<'a, EntryRecord>, UriError> {
RecordUri::try_from_uri(AtUri::new_cow(uri.into())?)
}
}
impl<'a> LexiconSchema for DatasetSize<'a> {
fn nsid() -> &'static str {
"science.alt.dataset.entry"
}
fn def_name() -> &'static str {
"datasetSize"
}
fn lexicon_doc() -> LexiconDoc<'static> {
lexicon_doc_science_alt_dataset_entry()
}
fn validate(&self) -> Result<(), ConstraintError> {
if let Some(ref value) = self.bytes {
if *value < 0i64 {
return Err(ConstraintError::Minimum {
path: ValidationPath::from_field("bytes"),
min: 0i64,
actual: *value,
});
}
}
if let Some(ref value) = self.samples {
if *value < 0i64 {
return Err(ConstraintError::Minimum {
path: ValidationPath::from_field("samples"),
min: 0i64,
actual: *value,
});
}
}
if let Some(ref value) = self.shards {
if *value < 1i64 {
return Err(ConstraintError::Minimum {
path: ValidationPath::from_field("shards"),
min: 1i64,
actual: *value,
});
}
}
Ok(())
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct EntryRecord;
impl XrpcResp for EntryRecord {
const NSID: &'static str = "science.alt.dataset.entry";
const ENCODING: &'static str = "application/json";
type Output<'de> = EntryGetRecordOutput<'de>;
type Err<'de> = RecordError<'de>;
}
impl From<EntryGetRecordOutput<'_>> for Entry<'_> {
fn from(output: EntryGetRecordOutput<'_>) -> Self {
use jacquard_common::IntoStatic;
output.value.into_static()
}
}
impl Collection for Entry<'_> {
const NSID: &'static str = "science.alt.dataset.entry";
type Record = EntryRecord;
}
impl Collection for EntryRecord {
const NSID: &'static str = "science.alt.dataset.entry";
type Record = EntryRecord;
}
impl<'a> LexiconSchema for Entry<'a> {
fn nsid() -> &'static str {
"science.alt.dataset.entry"
}
fn def_name() -> &'static str {
"main"
}
fn lexicon_doc() -> LexiconDoc<'static> {
lexicon_doc_science_alt_dataset_entry()
}
fn validate(&self) -> Result<(), ConstraintError> {
if let Some(ref value) = self.description {
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 5000usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("description"),
max: 5000usize,
actual: <str>::len(value.as_ref()),
});
}
}
if let Some(ref value) = self.license {
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 200usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("license"),
max: 200usize,
actual: <str>::len(value.as_ref()),
});
}
}
if let Some(ref value) = self.metadata_schema_ref {
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 500usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("metadata_schema_ref"),
max: 500usize,
actual: <str>::len(value.as_ref()),
});
}
}
{
let value = &self.name;
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 200usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("name"),
max: 200usize,
actual: <str>::len(value.as_ref()),
});
}
}
{
let value = &self.schema_ref;
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 500usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("schema_ref"),
max: 500usize,
actual: <str>::len(value.as_ref()),
});
}
}
if let Some(ref value) = self.tags {
#[allow(unused_comparisons)]
if value.len() > 30usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("tags"),
max: 30usize,
actual: value.len(),
});
}
}
Ok(())
}
}
impl<'a> LexiconSchema for ShardChecksum<'a> {
fn nsid() -> &'static str {
"science.alt.dataset.entry"
}
fn def_name() -> &'static str {
"shardChecksum"
}
fn lexicon_doc() -> LexiconDoc<'static> {
lexicon_doc_science_alt_dataset_entry()
}
fn validate(&self) -> Result<(), ConstraintError> {
{
let value = &self.algorithm;
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 20usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("algorithm"),
max: 20usize,
actual: <str>::len(value.as_ref()),
});
}
}
{
let value = &self.digest;
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 128usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("digest"),
max: 128usize,
actual: <str>::len(value.as_ref()),
});
}
}
Ok(())
}
}
fn lexicon_doc_science_alt_dataset_entry() -> LexiconDoc<'static> {
#[allow(unused_imports)]
use jacquard_common::{CowStr, deps::smol_str::SmolStr, types::blob::MimeType};
use jacquard_lexicon::lexicon::*;
use alloc::collections::BTreeMap;
LexiconDoc {
lexicon: Lexicon::Lexicon1,
id: CowStr::new_static("science.alt.dataset.entry"),
defs: {
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("datasetSize"),
LexUserType::Object(LexObject {
description: Some(
CowStr::new_static("Information about dataset size"),
),
properties: {
#[allow(unused_mut)]
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("bytes"),
LexObjectProperty::Integer(LexInteger {
minimum: Some(0i64),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("samples"),
LexObjectProperty::Integer(LexInteger {
minimum: Some(0i64),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("shards"),
LexObjectProperty::Integer(LexInteger {
minimum: Some(1i64),
..Default::default()
}),
);
map
},
..Default::default()
}),
);
map.insert(
SmolStr::new_static("main"),
LexUserType::Record(LexRecord {
description: Some(
CowStr::new_static(
"Index entry for a WebDataset-backed dataset with references to storage location and sample schema",
),
),
key: Some(CowStr::new_static("tid")),
record: LexRecordRecord::Object(LexObject {
required: Some(
vec![
SmolStr::new_static("name"),
SmolStr::new_static("schemaRef"),
SmolStr::new_static("storage"),
SmolStr::new_static("createdAt")
],
),
properties: {
#[allow(unused_mut)]
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("contentMetadata"),
LexObjectProperty::Unknown(LexUnknown {
..Default::default()
}),
);
map.insert(
SmolStr::new_static("createdAt"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"Timestamp when this dataset entry was created",
),
),
format: Some(LexStringFormat::Datetime),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("description"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"Human-readable description of the dataset",
),
),
max_length: Some(5000usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("license"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"License identifier or URL. SPDX identifiers recommended (e.g., MIT, Apache-2.0, CC-BY-4.0) or full SPDX URLs (e.g., http://spdx.org/licenses/MIT). Aligns with Schema.org license property.",
),
),
max_length: Some(200usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("metadata"),
LexObjectProperty::Bytes(LexBytes {
max_length: Some(100000usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("metadataSchemaRef"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"Optional AT-URI reference to a schema record defining the structure of this dataset's content metadata. When present, contentMetadata is validated against this schema at write time.",
),
),
format: Some(LexStringFormat::AtUri),
max_length: Some(500usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("name"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static("Human-readable dataset name"),
),
max_length: Some(200usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("schemaRef"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"AT-URI reference to the schema record for this dataset's samples",
),
),
format: Some(LexStringFormat::AtUri),
max_length: Some(500usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("size"),
LexObjectProperty::Ref(LexRef {
r#ref: CowStr::new_static("#datasetSize"),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("storage"),
LexObjectProperty::Union(LexRefUnion {
description: Some(
CowStr::new_static(
"Storage location for dataset files (WebDataset tar archives)",
),
),
refs: vec![
CowStr::new_static("science.alt.dataset.storageHttp"),
CowStr::new_static("science.alt.dataset.storageS3"),
CowStr::new_static("science.alt.dataset.storageBlobs")
],
..Default::default()
}),
);
map.insert(
SmolStr::new_static("tags"),
LexObjectProperty::Array(LexArray {
description: Some(
CowStr::new_static(
"Searchable tags for dataset discovery. Aligns with Schema.org keywords property.",
),
),
items: LexArrayItem::String(LexString {
max_length: Some(150usize),
..Default::default()
}),
max_length: Some(30usize),
..Default::default()
}),
);
map
},
..Default::default()
}),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("shardChecksum"),
LexUserType::Object(LexObject {
description: Some(
CowStr::new_static(
"Content hash for shard integrity verification. Algorithm is flexible to allow SHA-256, BLAKE3, or other hash functions.",
),
),
required: Some(
vec![
SmolStr::new_static("algorithm"),
SmolStr::new_static("digest")
],
),
properties: {
#[allow(unused_mut)]
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("algorithm"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"Hash algorithm identifier (e.g., 'sha256', 'blake3')",
),
),
max_length: Some(20usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("digest"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static("Hex-encoded hash digest"),
),
max_length: Some(128usize),
..Default::default()
}),
);
map
},
..Default::default()
}),
);
map
},
..Default::default()
}
}
pub mod entry_state {
pub use crate::builder_types::{Set, Unset, IsSet, IsUnset};
#[allow(unused)]
use ::core::marker::PhantomData;
mod sealed {
pub trait Sealed {}
}
pub trait State: sealed::Sealed {
type SchemaRef;
type CreatedAt;
type Name;
type Storage;
}
pub struct Empty(());
impl sealed::Sealed for Empty {}
impl State for Empty {
type SchemaRef = Unset;
type CreatedAt = Unset;
type Name = Unset;
type Storage = Unset;
}
pub struct SetSchemaRef<S: State = Empty>(PhantomData<fn() -> S>);
impl<S: State> sealed::Sealed for SetSchemaRef<S> {}
impl<S: State> State for SetSchemaRef<S> {
type SchemaRef = Set<members::schema_ref>;
type CreatedAt = S::CreatedAt;
type Name = S::Name;
type Storage = S::Storage;
}
pub struct SetCreatedAt<S: State = Empty>(PhantomData<fn() -> S>);
impl<S: State> sealed::Sealed for SetCreatedAt<S> {}
impl<S: State> State for SetCreatedAt<S> {
type SchemaRef = S::SchemaRef;
type CreatedAt = Set<members::created_at>;
type Name = S::Name;
type Storage = S::Storage;
}
pub struct SetName<S: State = Empty>(PhantomData<fn() -> S>);
impl<S: State> sealed::Sealed for SetName<S> {}
impl<S: State> State for SetName<S> {
type SchemaRef = S::SchemaRef;
type CreatedAt = S::CreatedAt;
type Name = Set<members::name>;
type Storage = S::Storage;
}
pub struct SetStorage<S: State = Empty>(PhantomData<fn() -> S>);
impl<S: State> sealed::Sealed for SetStorage<S> {}
impl<S: State> State for SetStorage<S> {
type SchemaRef = S::SchemaRef;
type CreatedAt = S::CreatedAt;
type Name = S::Name;
type Storage = Set<members::storage>;
}
#[allow(non_camel_case_types)]
pub mod members {
pub struct schema_ref(());
pub struct created_at(());
pub struct name(());
pub struct storage(());
}
}
pub struct EntryBuilder<'a, S: entry_state::State> {
_state: PhantomData<fn() -> S>,
_fields: (
Option<Data<'a>>,
Option<Datetime>,
Option<CowStr<'a>>,
Option<CowStr<'a>>,
Option<Bytes>,
Option<AtUri<'a>>,
Option<CowStr<'a>>,
Option<AtUri<'a>>,
Option<entry::DatasetSize<'a>>,
Option<EntryStorage<'a>>,
Option<Vec<CowStr<'a>>>,
),
_lifetime: PhantomData<&'a ()>,
}
impl<'a> Entry<'a> {
pub fn new() -> EntryBuilder<'a, entry_state::Empty> {
EntryBuilder::new()
}
}
impl<'a> EntryBuilder<'a, entry_state::Empty> {
pub fn new() -> Self {
EntryBuilder {
_state: PhantomData,
_fields: (None, None, None, None, None, None, None, None, None, None, None),
_lifetime: PhantomData,
}
}
}
impl<'a, S: entry_state::State> EntryBuilder<'a, S> {
pub fn content_metadata(mut self, value: impl Into<Option<Data<'a>>>) -> Self {
self._fields.0 = value.into();
self
}
pub fn maybe_content_metadata(mut self, value: Option<Data<'a>>) -> Self {
self._fields.0 = value;
self
}
}
impl<'a, S> EntryBuilder<'a, S>
where
S: entry_state::State,
S::CreatedAt: entry_state::IsUnset,
{
pub fn created_at(
mut self,
value: impl Into<Datetime>,
) -> EntryBuilder<'a, entry_state::SetCreatedAt<S>> {
self._fields.1 = Option::Some(value.into());
EntryBuilder {
_state: PhantomData,
_fields: self._fields,
_lifetime: PhantomData,
}
}
}
impl<'a, S: entry_state::State> EntryBuilder<'a, S> {
pub fn description(mut self, value: impl Into<Option<CowStr<'a>>>) -> Self {
self._fields.2 = value.into();
self
}
pub fn maybe_description(mut self, value: Option<CowStr<'a>>) -> Self {
self._fields.2 = value;
self
}
}
impl<'a, S: entry_state::State> EntryBuilder<'a, S> {
pub fn license(mut self, value: impl Into<Option<CowStr<'a>>>) -> Self {
self._fields.3 = value.into();
self
}
pub fn maybe_license(mut self, value: Option<CowStr<'a>>) -> Self {
self._fields.3 = value;
self
}
}
impl<'a, S: entry_state::State> EntryBuilder<'a, S> {
pub fn metadata(mut self, value: impl Into<Option<Bytes>>) -> Self {
self._fields.4 = value.into();
self
}
pub fn maybe_metadata(mut self, value: Option<Bytes>) -> Self {
self._fields.4 = value;
self
}
}
impl<'a, S: entry_state::State> EntryBuilder<'a, S> {
pub fn metadata_schema_ref(mut self, value: impl Into<Option<AtUri<'a>>>) -> Self {
self._fields.5 = value.into();
self
}
pub fn maybe_metadata_schema_ref(mut self, value: Option<AtUri<'a>>) -> Self {
self._fields.5 = value;
self
}
}
impl<'a, S> EntryBuilder<'a, S>
where
S: entry_state::State,
S::Name: entry_state::IsUnset,
{
pub fn name(
mut self,
value: impl Into<CowStr<'a>>,
) -> EntryBuilder<'a, entry_state::SetName<S>> {
self._fields.6 = Option::Some(value.into());
EntryBuilder {
_state: PhantomData,
_fields: self._fields,
_lifetime: PhantomData,
}
}
}
impl<'a, S> EntryBuilder<'a, S>
where
S: entry_state::State,
S::SchemaRef: entry_state::IsUnset,
{
pub fn schema_ref(
mut self,
value: impl Into<AtUri<'a>>,
) -> EntryBuilder<'a, entry_state::SetSchemaRef<S>> {
self._fields.7 = Option::Some(value.into());
EntryBuilder {
_state: PhantomData,
_fields: self._fields,
_lifetime: PhantomData,
}
}
}
impl<'a, S: entry_state::State> EntryBuilder<'a, S> {
pub fn size(mut self, value: impl Into<Option<entry::DatasetSize<'a>>>) -> Self {
self._fields.8 = value.into();
self
}
pub fn maybe_size(mut self, value: Option<entry::DatasetSize<'a>>) -> Self {
self._fields.8 = value;
self
}
}
impl<'a, S> EntryBuilder<'a, S>
where
S: entry_state::State,
S::Storage: entry_state::IsUnset,
{
pub fn storage(
mut self,
value: impl Into<EntryStorage<'a>>,
) -> EntryBuilder<'a, entry_state::SetStorage<S>> {
self._fields.9 = Option::Some(value.into());
EntryBuilder {
_state: PhantomData,
_fields: self._fields,
_lifetime: PhantomData,
}
}
}
impl<'a, S: entry_state::State> EntryBuilder<'a, S> {
pub fn tags(mut self, value: impl Into<Option<Vec<CowStr<'a>>>>) -> Self {
self._fields.10 = value.into();
self
}
pub fn maybe_tags(mut self, value: Option<Vec<CowStr<'a>>>) -> Self {
self._fields.10 = value;
self
}
}
impl<'a, S> EntryBuilder<'a, S>
where
S: entry_state::State,
S::SchemaRef: entry_state::IsSet,
S::CreatedAt: entry_state::IsSet,
S::Name: entry_state::IsSet,
S::Storage: entry_state::IsSet,
{
pub fn build(self) -> Entry<'a> {
Entry {
content_metadata: self._fields.0,
created_at: self._fields.1.unwrap(),
description: self._fields.2,
license: self._fields.3,
metadata: self._fields.4,
metadata_schema_ref: self._fields.5,
name: self._fields.6.unwrap(),
schema_ref: self._fields.7.unwrap(),
size: self._fields.8,
storage: self._fields.9.unwrap(),
tags: self._fields.10,
extra_data: Default::default(),
}
}
pub fn build_with_data(
self,
extra_data: BTreeMap<jacquard_common::deps::smol_str::SmolStr, Data<'a>>,
) -> Entry<'a> {
Entry {
content_metadata: self._fields.0,
created_at: self._fields.1.unwrap(),
description: self._fields.2,
license: self._fields.3,
metadata: self._fields.4,
metadata_schema_ref: self._fields.5,
name: self._fields.6.unwrap(),
schema_ref: self._fields.7.unwrap(),
size: self._fields.8,
storage: self._fields.9.unwrap(),
tags: self._fields.10,
extra_data: Some(extra_data),
}
}
}