#[allow(unused_imports)]
use alloc::collections::BTreeMap;
#[allow(unused_imports)]
use core::marker::PhantomData;
use jacquard_common::{BosStr, CowStr, DefaultStr, FromStaticStr};
#[allow(unused_imports)]
use jacquard_common::deps::codegen::unicode_segmentation::UnicodeSegmentation;
use jacquard_common::deps::smol_str::SmolStr;
use jacquard_common::types::string::UriValue;
use jacquard_common::types::value::Data;
use jacquard_derive::IntoStatic;
use jacquard_lexicon::lexicon::LexiconDoc;
use jacquard_lexicon::schema::LexiconSchema;
use crate::science_alt::dataset::entry::ShardChecksum;
use crate::science_alt::dataset::storage_s3;
#[allow(unused_imports)]
use jacquard_lexicon::validation::{ConstraintError, ValidationPath};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic)]
#[serde(
rename_all = "camelCase",
bound(deserialize = "S: Deserialize<'de> + BosStr")
)]
pub struct StorageS3<S: BosStr = DefaultStr> {
pub bucket: S,
#[serde(skip_serializing_if = "Option::is_none")]
pub endpoint: Option<UriValue<S>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub region: Option<S>,
pub shards: Vec<storage_s3::ShardEntry<S>>,
#[serde(flatten, default, skip_serializing_if = "Option::is_none")]
pub extra_data: Option<BTreeMap<SmolStr, Data<S>>>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, IntoStatic)]
#[serde(
rename_all = "camelCase",
bound(deserialize = "S: Deserialize<'de> + BosStr")
)]
pub struct ShardEntry<S: BosStr = DefaultStr> {
pub checksum: ShardChecksum<S>,
pub key: S,
#[serde(flatten, default, skip_serializing_if = "Option::is_none")]
pub extra_data: Option<BTreeMap<SmolStr, Data<S>>>,
}
impl<S: BosStr> LexiconSchema for StorageS3<S> {
fn nsid() -> &'static str {
"science.alt.dataset.storageS3"
}
fn def_name() -> &'static str {
"main"
}
fn lexicon_doc() -> LexiconDoc<'static> {
lexicon_doc_science_alt_dataset_storageS3()
}
fn validate(&self) -> Result<(), ConstraintError> {
{
let value = &self.bucket;
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 255usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("bucket"),
max: 255usize,
actual: <str>::len(value.as_ref()),
});
}
}
if let Some(ref value) = self.endpoint {
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 500usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("endpoint"),
max: 500usize,
actual: <str>::len(value.as_ref()),
});
}
}
if let Some(ref value) = self.region {
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 50usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("region"),
max: 50usize,
actual: <str>::len(value.as_ref()),
});
}
}
{
let value = &self.shards;
#[allow(unused_comparisons)]
if value.len() < 1usize {
return Err(ConstraintError::MinLength {
path: ValidationPath::from_field("shards"),
min: 1usize,
actual: value.len(),
});
}
}
Ok(())
}
}
impl<S: BosStr> LexiconSchema for ShardEntry<S> {
fn nsid() -> &'static str {
"science.alt.dataset.storageS3"
}
fn def_name() -> &'static str {
"shardEntry"
}
fn lexicon_doc() -> LexiconDoc<'static> {
lexicon_doc_science_alt_dataset_storageS3()
}
fn validate(&self) -> Result<(), ConstraintError> {
{
let value = &self.key;
#[allow(unused_comparisons)]
if <str>::len(value.as_ref()) > 1024usize {
return Err(ConstraintError::MaxLength {
path: ValidationPath::from_field("key"),
max: 1024usize,
actual: <str>::len(value.as_ref()),
});
}
}
Ok(())
}
}
pub mod storage_s3_state {
pub use crate::builder_types::{IsSet, IsUnset, Set, Unset};
#[allow(unused)]
use ::core::marker::PhantomData;
mod sealed {
pub trait Sealed {}
}
pub trait State: sealed::Sealed {
type Shards;
type Bucket;
}
pub struct Empty(());
impl sealed::Sealed for Empty {}
impl State for Empty {
type Shards = Unset;
type Bucket = Unset;
}
pub struct SetShards<St: State = Empty>(PhantomData<fn() -> St>);
impl<St: State> sealed::Sealed for SetShards<St> {}
impl<St: State> State for SetShards<St> {
type Shards = Set<members::shards>;
type Bucket = St::Bucket;
}
pub struct SetBucket<St: State = Empty>(PhantomData<fn() -> St>);
impl<St: State> sealed::Sealed for SetBucket<St> {}
impl<St: State> State for SetBucket<St> {
type Shards = St::Shards;
type Bucket = Set<members::bucket>;
}
#[allow(non_camel_case_types)]
pub mod members {
pub struct shards(());
pub struct bucket(());
}
}
pub struct StorageS3Builder<S: BosStr, St: storage_s3_state::State> {
_state: PhantomData<fn() -> St>,
_fields: (
Option<S>,
Option<UriValue<S>>,
Option<S>,
Option<Vec<storage_s3::ShardEntry<S>>>,
),
_type: PhantomData<fn() -> S>,
}
impl<S: BosStr> StorageS3<S> {
pub fn new() -> StorageS3Builder<S, storage_s3_state::Empty> {
StorageS3Builder::new()
}
}
impl<S: BosStr> StorageS3Builder<S, storage_s3_state::Empty> {
pub fn new() -> Self {
StorageS3Builder {
_state: PhantomData,
_fields: (None, None, None, None),
_type: PhantomData,
}
}
}
impl<S: BosStr, St> StorageS3Builder<S, St>
where
St: storage_s3_state::State,
St::Bucket: storage_s3_state::IsUnset,
{
pub fn bucket(
mut self,
value: impl Into<S>,
) -> StorageS3Builder<S, storage_s3_state::SetBucket<St>> {
self._fields.0 = Option::Some(value.into());
StorageS3Builder {
_state: PhantomData,
_fields: self._fields,
_type: PhantomData,
}
}
}
impl<S: BosStr, St: storage_s3_state::State> StorageS3Builder<S, St> {
pub fn endpoint(mut self, value: impl Into<Option<UriValue<S>>>) -> Self {
self._fields.1 = value.into();
self
}
pub fn maybe_endpoint(mut self, value: Option<UriValue<S>>) -> Self {
self._fields.1 = value;
self
}
}
impl<S: BosStr, St: storage_s3_state::State> StorageS3Builder<S, St> {
pub fn region(mut self, value: impl Into<Option<S>>) -> Self {
self._fields.2 = value.into();
self
}
pub fn maybe_region(mut self, value: Option<S>) -> Self {
self._fields.2 = value;
self
}
}
impl<S: BosStr, St> StorageS3Builder<S, St>
where
St: storage_s3_state::State,
St::Shards: storage_s3_state::IsUnset,
{
pub fn shards(
mut self,
value: impl Into<Vec<storage_s3::ShardEntry<S>>>,
) -> StorageS3Builder<S, storage_s3_state::SetShards<St>> {
self._fields.3 = Option::Some(value.into());
StorageS3Builder {
_state: PhantomData,
_fields: self._fields,
_type: PhantomData,
}
}
}
impl<S: BosStr, St> StorageS3Builder<S, St>
where
St: storage_s3_state::State,
St::Shards: storage_s3_state::IsSet,
St::Bucket: storage_s3_state::IsSet,
{
pub fn build(self) -> StorageS3<S> {
StorageS3 {
bucket: self._fields.0.unwrap(),
endpoint: self._fields.1,
region: self._fields.2,
shards: self._fields.3.unwrap(),
extra_data: Default::default(),
}
}
pub fn build_with_data(self, extra_data: BTreeMap<SmolStr, Data<S>>) -> StorageS3<S> {
StorageS3 {
bucket: self._fields.0.unwrap(),
endpoint: self._fields.1,
region: self._fields.2,
shards: self._fields.3.unwrap(),
extra_data: Some(extra_data),
}
}
}
fn lexicon_doc_science_alt_dataset_storageS3() -> LexiconDoc<'static> {
use alloc::collections::BTreeMap;
#[allow(unused_imports)]
use jacquard_common::{CowStr, deps::smol_str::SmolStr, types::blob::MimeType};
use jacquard_lexicon::lexicon::*;
LexiconDoc {
lexicon: Lexicon::Lexicon1,
id: CowStr::new_static("science.alt.dataset.storageS3"),
defs: {
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("main"),
LexUserType::Object(LexObject {
description: Some(
CowStr::new_static(
"S3 or S3-compatible storage for WebDataset tar archives. Supports custom endpoints for MinIO, Cloudflare R2, and other S3-compatible services.",
),
),
required: Some(
vec![
SmolStr::new_static("bucket"), SmolStr::new_static("shards")
],
),
properties: {
#[allow(unused_mut)]
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("bucket"),
LexObjectProperty::String(LexString {
description: Some(CowStr::new_static("S3 bucket name")),
max_length: Some(255usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("endpoint"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"Custom S3-compatible endpoint URL (e.g., for MinIO, Cloudflare R2). Omit for standard AWS S3.",
),
),
format: Some(LexStringFormat::Uri),
max_length: Some(500usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("region"),
LexObjectProperty::String(LexString {
description: Some(
CowStr::new_static(
"AWS region (e.g., 'us-east-1'). Optional for S3-compatible services.",
),
),
max_length: Some(50usize),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("shards"),
LexObjectProperty::Array(LexArray {
description: Some(
CowStr::new_static(
"Array of shard entries with object key and integrity checksum",
),
),
items: LexArrayItem::Ref(LexRef {
r#ref: CowStr::new_static("#shardEntry"),
..Default::default()
}),
min_length: Some(1usize),
..Default::default()
}),
);
map
},
..Default::default()
}),
);
map.insert(
SmolStr::new_static("shardEntry"),
LexUserType::Object(LexObject {
description: Some(CowStr::new_static(
"A single S3 object shard with integrity checksum",
)),
required: Some(vec![
SmolStr::new_static("key"),
SmolStr::new_static("checksum"),
]),
properties: {
#[allow(unused_mut)]
let mut map = BTreeMap::new();
map.insert(
SmolStr::new_static("checksum"),
LexObjectProperty::Ref(LexRef {
r#ref: CowStr::new_static(
"science.alt.dataset.entry#shardChecksum",
),
..Default::default()
}),
);
map.insert(
SmolStr::new_static("key"),
LexObjectProperty::String(LexString {
description: Some(CowStr::new_static(
"S3 object key for this WebDataset tar shard",
)),
max_length: Some(1024usize),
..Default::default()
}),
);
map
},
..Default::default()
}),
);
map
},
..Default::default()
}
}
pub mod shard_entry_state {
pub use crate::builder_types::{IsSet, IsUnset, Set, Unset};
#[allow(unused)]
use ::core::marker::PhantomData;
mod sealed {
pub trait Sealed {}
}
pub trait State: sealed::Sealed {
type Key;
type Checksum;
}
pub struct Empty(());
impl sealed::Sealed for Empty {}
impl State for Empty {
type Key = Unset;
type Checksum = Unset;
}
pub struct SetKey<St: State = Empty>(PhantomData<fn() -> St>);
impl<St: State> sealed::Sealed for SetKey<St> {}
impl<St: State> State for SetKey<St> {
type Key = Set<members::key>;
type Checksum = St::Checksum;
}
pub struct SetChecksum<St: State = Empty>(PhantomData<fn() -> St>);
impl<St: State> sealed::Sealed for SetChecksum<St> {}
impl<St: State> State for SetChecksum<St> {
type Key = St::Key;
type Checksum = Set<members::checksum>;
}
#[allow(non_camel_case_types)]
pub mod members {
pub struct key(());
pub struct checksum(());
}
}
pub struct ShardEntryBuilder<S: BosStr, St: shard_entry_state::State> {
_state: PhantomData<fn() -> St>,
_fields: (Option<ShardChecksum<S>>, Option<S>),
_type: PhantomData<fn() -> S>,
}
impl<S: BosStr> ShardEntry<S> {
pub fn new() -> ShardEntryBuilder<S, shard_entry_state::Empty> {
ShardEntryBuilder::new()
}
}
impl<S: BosStr> ShardEntryBuilder<S, shard_entry_state::Empty> {
pub fn new() -> Self {
ShardEntryBuilder {
_state: PhantomData,
_fields: (None, None),
_type: PhantomData,
}
}
}
impl<S: BosStr, St> ShardEntryBuilder<S, St>
where
St: shard_entry_state::State,
St::Checksum: shard_entry_state::IsUnset,
{
pub fn checksum(
mut self,
value: impl Into<ShardChecksum<S>>,
) -> ShardEntryBuilder<S, shard_entry_state::SetChecksum<St>> {
self._fields.0 = Option::Some(value.into());
ShardEntryBuilder {
_state: PhantomData,
_fields: self._fields,
_type: PhantomData,
}
}
}
impl<S: BosStr, St> ShardEntryBuilder<S, St>
where
St: shard_entry_state::State,
St::Key: shard_entry_state::IsUnset,
{
pub fn key(
mut self,
value: impl Into<S>,
) -> ShardEntryBuilder<S, shard_entry_state::SetKey<St>> {
self._fields.1 = Option::Some(value.into());
ShardEntryBuilder {
_state: PhantomData,
_fields: self._fields,
_type: PhantomData,
}
}
}
impl<S: BosStr, St> ShardEntryBuilder<S, St>
where
St: shard_entry_state::State,
St::Key: shard_entry_state::IsSet,
St::Checksum: shard_entry_state::IsSet,
{
pub fn build(self) -> ShardEntry<S> {
ShardEntry {
checksum: self._fields.0.unwrap(),
key: self._fields.1.unwrap(),
extra_data: Default::default(),
}
}
pub fn build_with_data(self, extra_data: BTreeMap<SmolStr, Data<S>>) -> ShardEntry<S> {
ShardEntry {
checksum: self._fields.0.unwrap(),
key: self._fields.1.unwrap(),
extra_data: Some(extra_data),
}
}
}