#[allow(unused_imports)]
use alloc::collections::BTreeMap;
#[allow(unused_imports)]
use core::marker::PhantomData;
use jacquard_common::CowStr;
#[allow(unused_imports)]
use jacquard_common::deps::codegen::unicode_segmentation::UnicodeSegmentation;
use jacquard_common::types::string::UriValue;
use jacquard_derive::{IntoStatic, lexicon};
use jacquard_lexicon::lexicon::LexiconDoc;
use jacquard_lexicon::schema::LexiconSchema;
#[allow(unused_imports)]
use jacquard_lexicon::validation::{ConstraintError, ValidationPath};
use serde::{Serialize, Deserialize};
use crate::science_alt::dataset::entry::ShardChecksum;
use crate::science_alt::dataset::storage_s3;
#[lexicon]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic)]
#[serde(rename_all = "camelCase")]
pub struct StorageS3<'a> {
#[serde(borrow)]
pub bucket: CowStr<'a>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub endpoint: Option<UriValue<'a>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(borrow)]
pub region: Option<CowStr<'a>>,
#[serde(borrow)]
pub shards: Vec<storage_s3::ShardEntry<'a>>,
}
#[lexicon]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic)]
#[serde(rename_all = "camelCase")]
pub struct ShardEntry<'a> {
#[serde(borrow)]
pub checksum: ShardChecksum<'a>,
#[serde(borrow)]
pub key: CowStr<'a>,
}
impl<'a> LexiconSchema for StorageS3<'a> {
fn nsid() -> &'static str {
"science.alt.dataset.storageS3"
}
fn def_name() -> &'static str {
"main"
}
fn lexicon_doc() -> LexiconDoc<'static> {
lexicon_doc_science_alt_dataset_storageS3()
}
fn validate(&self) -> Result<(), ConstraintError> {
{
let value = &self.bucket;
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 255usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("bucket"),
max: 255usize,
actual: <str>::len(value.as_ref()),
});
}
}
if let Some(ref value) = self.endpoint {
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 500usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("endpoint"),
max: 500usize,
actual: <str>::len(value.as_ref()),
});
}
}
if let Some(ref value) = self.region {
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 50usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("region"),
max: 50usize,
actual: <str>::len(value.as_ref()),
});
}
}
{
let value = &self.shards;
#[allow(unused_comparisons)]
if value.len() < 1usize {
return Err(ConstraintError::MinLength {
path: ValidationPath::from_field("shards"),
min: 1usize,
actual: value.len(),
});
}
}
Ok(())
}
}
impl<'a> LexiconSchema for ShardEntry<'a> {
fn nsid() -> &'static str {
"science.alt.dataset.storageS3"
}
fn def_name() -> &'static str {
"shardEntry"
}
fn lexicon_doc() -> LexiconDoc<'static> {
lexicon_doc_science_alt_dataset_storageS3()
}
fn validate(&self) -> Result<(), ConstraintError> {
{
let value = &self.key;
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 1024usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("key"),
max: 1024usize,
actual: <str>::len(value.as_ref()),
});
}
}
Ok(())
}
}
pub mod storage_s3_state {
pub use crate::builder_types::{Set, Unset, IsSet, IsUnset};
#[allow(unused)]
use ::core::marker::PhantomData;
mod sealed {
pub trait Sealed {}
}
pub trait State: sealed::Sealed {
type Shards;
type Bucket;
}
pub struct Empty(());
impl sealed::Sealed for Empty {}
impl State for Empty {
type Shards = Unset;
type Bucket = Unset;
}
pub struct SetShards<S: State = Empty>(PhantomData<fn() -> S>);
impl<S: State> sealed::Sealed for SetShards<S> {}
impl<S: State> State for SetShards<S> {
type Shards = Set<members::shards>;
type Bucket = S::Bucket;
}
pub struct SetBucket<S: State = Empty>(PhantomData<fn() -> S>);
impl<S: State> sealed::Sealed for SetBucket<S> {}
impl<S: State> State for SetBucket<S> {
type Shards = S::Shards;
type Bucket = Set<members::bucket>;
}
#[allow(non_camel_case_types)]
pub mod members {
pub struct shards(());
pub struct bucket(());
}
}
pub struct StorageS3Builder<'a, S: storage_s3_state::State> {
_state: PhantomData<fn() -> S>,
_fields: (
Option<CowStr<'a>>,
Option<UriValue<'a>>,
Option<CowStr<'a>>,
Option<Vec<storage_s3::ShardEntry<'a>>>,
),
_lifetime: PhantomData<&'a ()>,
}
impl<'a> StorageS3<'a> {
pub fn new() -> StorageS3Builder<'a, storage_s3_state::Empty> {
StorageS3Builder::new()
}
}
impl<'a> StorageS3Builder<'a, storage_s3_state::Empty> {
pub fn new() -> Self {
StorageS3Builder {
_state: PhantomData,
_fields: (None, None, None, None),
_lifetime: PhantomData,
}
}
}
impl<'a, S> StorageS3Builder<'a, S>
where
S: storage_s3_state::State,
S::Bucket: storage_s3_state::IsUnset,
{
pub fn bucket(
mut self,
value: impl Into<CowStr<'a>>,
) -> StorageS3Builder<'a, storage_s3_state::SetBucket<S>> {
self._fields.0 = Option::Some(value.into());
StorageS3Builder {
_state: PhantomData,
_fields: self._fields,
_lifetime: PhantomData,
}
}
}
impl<'a, S: storage_s3_state::State> StorageS3Builder<'a, S> {
pub fn endpoint(mut self, value: impl Into<Option<UriValue<'a>>>) -> Self {
self._fields.1 = value.into();
self
}
pub fn maybe_endpoint(mut self, value: Option<UriValue<'a>>) -> Self {
self._fields.1 = value;
self
}
}
impl<'a, S: storage_s3_state::State> StorageS3Builder<'a, S> {
pub fn region(mut self, value: impl Into<Option<CowStr<'a>>>) -> Self {
self._fields.2 = value.into();
self
}
pub fn maybe_region(mut self, value: Option<CowStr<'a>>) -> Self {
self._fields.2 = value;
self
}
}
impl<'a, S> StorageS3Builder<'a, S>
where
S: storage_s3_state::State,
S::Shards: storage_s3_state::IsUnset,
{
pub fn shards(
mut self,
value: impl Into<Vec<storage_s3::ShardEntry<'a>>>,
) -> StorageS3Builder<'a, storage_s3_state::SetShards<S>> {
self._fields.3 = Option::Some(value.into());
StorageS3Builder {
_state: PhantomData,
_fields: self._fields,
_lifetime: PhantomData,
}
}
}
impl<'a, S> StorageS3Builder<'a, S>
where
S: storage_s3_state::State,
S::Shards: storage_s3_state::IsSet,
S::Bucket: storage_s3_state::IsSet,
{
pub fn build(self) -> StorageS3<'a> {
StorageS3 {
bucket: self._fields.0.unwrap(),
endpoint: self._fields.1,
region: self._fields.2,
shards: self._fields.3.unwrap(),
extra_data: Default::default(),
}
}
pub fn build_with_data(
self,
extra_data: BTreeMap<
jacquard_common::deps::smol_str::SmolStr,
jacquard_common::types::value::Data<'a>,
>,
) -> StorageS3<'a> {
StorageS3 {
bucket: self._fields.0.unwrap(),
endpoint: self._fields.1,
region: self._fields.2,
shards: self._fields.3.unwrap(),
extra_data: Some(extra_data),
}
}
}
fn lexicon_doc_science_alt_dataset_storageS3() -> LexiconDoc<'static> {
#[allow(unused_imports)]
use jacquard_common::{CowStr, deps::smol_str::SmolStr, types::blob::MimeType};
use jacquard_lexicon::lexicon::*;
use alloc::collections::BTreeMap;
LexiconDoc {
lexicon: Lexicon::Lexicon1,
id: CowStr::new_static("science.alt.dataset.storageS3"),
defs: {
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("main"),
LexUserType::Object(LexObject {
description: Some(
CowStr::new_static(
"S3 or S3-compatible storage for WebDataset tar archives. Supports custom endpoints for MinIO, Cloudflare R2, and other S3-compatible services.",
),
),
required: Some(
vec![
SmolStr::new_static("bucket"), SmolStr::new_static("shards")
],
),
properties: {
#[allow(unused_mut)]
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("bucket"),
LexObjectProperty::String(LexString {
description: Some(CowStr::new_static("S3 bucket name")),
max_length: Some(255usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("endpoint"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"Custom S3-compatible endpoint URL (e.g., for MinIO, Cloudflare R2). Omit for standard AWS S3.",
),
),
format: Some(LexStringFormat::Uri),
max_length: Some(500usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("region"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"AWS region (e.g., 'us-east-1'). Optional for S3-compatible services.",
),
),
max_length: Some(50usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("shards"),
LexObjectProperty::Array(LexArray {
description: Some(
CowStr::new_static(
"Array of shard entries with object key and integrity checksum",
),
),
items: LexArrayItem::Ref(LexRef {
r#ref: CowStr::new_static("#shardEntry"),
..Default::default()
}),
min_length: Some(1usize),
..Default::default()
}),
);
map
},
..Default::default()
}),
);
map.insert(
SmolStr::new_static("shardEntry"),
LexUserType::Object(LexObject {
description: Some(
CowStr::new_static(
"A single S3 object shard with integrity checksum",
),
),
required: Some(
vec![SmolStr::new_static("key"), SmolStr::new_static("checksum")],
),
properties: {
#[allow(unused_mut)]
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("checksum"),
LexObjectProperty::Ref(LexRef {
r#ref: CowStr::new_static(
"science.alt.dataset.entry#shardChecksum",
),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("key"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"S3 object key for this WebDataset tar shard",
),
),
max_length: Some(1024usize),
..Default::default()
}),
);
map
},
..Default::default()
}),
);
map
},
..Default::default()
}
}
pub mod shard_entry_state {
pub use crate::builder_types::{Set, Unset, IsSet, IsUnset};
#[allow(unused)]
use ::core::marker::PhantomData;
mod sealed {
pub trait Sealed {}
}
pub trait State: sealed::Sealed {
type Key;
type Checksum;
}
pub struct Empty(());
impl sealed::Sealed for Empty {}
impl State for Empty {
type Key = Unset;
type Checksum = Unset;
}
pub struct SetKey<S: State = Empty>(PhantomData<fn() -> S>);
impl<S: State> sealed::Sealed for SetKey<S> {}
impl<S: State> State for SetKey<S> {
type Key = Set<members::key>;
type Checksum = S::Checksum;
}
pub struct SetChecksum<S: State = Empty>(PhantomData<fn() -> S>);
impl<S: State> sealed::Sealed for SetChecksum<S> {}
impl<S: State> State for SetChecksum<S> {
type Key = S::Key;
type Checksum = Set<members::checksum>;
}
#[allow(non_camel_case_types)]
pub mod members {
pub struct key(());
pub struct checksum(());
}
}
pub struct ShardEntryBuilder<'a, S: shard_entry_state::State> {
_state: PhantomData<fn() -> S>,
_fields: (Option<ShardChecksum<'a>>, Option<CowStr<'a>>),
_lifetime: PhantomData<&'a ()>,
}
impl<'a> ShardEntry<'a> {
pub fn new() -> ShardEntryBuilder<'a, shard_entry_state::Empty> {
ShardEntryBuilder::new()
}
}
impl<'a> ShardEntryBuilder<'a, shard_entry_state::Empty> {
pub fn new() -> Self {
ShardEntryBuilder {
_state: PhantomData,
_fields: (None, None),
_lifetime: PhantomData,
}
}
}
impl<'a, S> ShardEntryBuilder<'a, S>
where
S: shard_entry_state::State,
S::Checksum: shard_entry_state::IsUnset,
{
pub fn checksum(
mut self,
value: impl Into<ShardChecksum<'a>>,
) -> ShardEntryBuilder<'a, shard_entry_state::SetChecksum<S>> {
self._fields.0 = Option::Some(value.into());
ShardEntryBuilder {
_state: PhantomData,
_fields: self._fields,
_lifetime: PhantomData,
}
}
}
impl<'a, S> ShardEntryBuilder<'a, S>
where
S: shard_entry_state::State,
S::Key: shard_entry_state::IsUnset,
{
pub fn key(
mut self,
value: impl Into<CowStr<'a>>,
) -> ShardEntryBuilder<'a, shard_entry_state::SetKey<S>> {
self._fields.1 = Option::Some(value.into());
ShardEntryBuilder {
_state: PhantomData,
_fields: self._fields,
_lifetime: PhantomData,
}
}
}
impl<'a, S> ShardEntryBuilder<'a, S>
where
S: shard_entry_state::State,
S::Key: shard_entry_state::IsSet,
S::Checksum: shard_entry_state::IsSet,
{
pub fn build(self) -> ShardEntry<'a> {
ShardEntry {
checksum: self._fields.0.unwrap(),
key: self._fields.1.unwrap(),
extra_data: Default::default(),
}
}
pub fn build_with_data(
self,
extra_data: BTreeMap<
jacquard_common::deps::smol_str::SmolStr,
jacquard_common::types::value::Data<'a>,
>,
) -> ShardEntry<'a> {
ShardEntry {
checksum: self._fields.0.unwrap(),
key: self._fields.1.unwrap(),
extra_data: Some(extra_data),
}
}
}