pub struct HfPublisher { /* private fields */ }Expand description
Publisher for uploading datasets to HuggingFace Hub.
§Example
use alimentar::hf_hub::HfPublisher;
use arrow::record_batch::RecordBatch;
let publisher = HfPublisher::new("paiml/my-dataset")
.with_token(std::env::var("HF_TOKEN").unwrap())
.with_private(false);
// publisher.upload_parquet("train.parquet", &batch).unwrap();Implementations§
Source§impl HfPublisher
impl HfPublisher
Sourcepub fn new(repo_id: impl Into<String>) -> Self
pub fn new(repo_id: impl Into<String>) -> Self
Creates a new publisher for a HuggingFace dataset repository.
Sourcepub fn with_token(self, token: impl Into<String>) -> Self
pub fn with_token(self, token: impl Into<String>) -> Self
Sets the HuggingFace API token.
Sourcepub fn with_private(self, private: bool) -> Self
pub fn with_private(self, private: bool) -> Self
Sets whether the dataset should be private.
Sourcepub fn with_commit_message(self, message: impl Into<String>) -> Self
pub fn with_commit_message(self, message: impl Into<String>) -> Self
Sets the commit message for uploads.
Sourcepub async fn create_repo(&self) -> Result<()>
pub async fn create_repo(&self) -> Result<()>
Creates the dataset repository on HuggingFace Hub if it doesn’t exist.
Sourcepub async fn upload_file(&self, path_in_repo: &str, data: &[u8]) -> Result<()>
pub async fn upload_file(&self, path_in_repo: &str, data: &[u8]) -> Result<()>
Uploads a file to the repository.
This method automatically selects the appropriate upload method:
- Binary files (parquet, images, etc.): Uses LFS preupload API
- Text files (README.md, JSON, etc.): Uses direct NDJSON commit API
The official hf-hub crate only supports downloads, making this upload
capability a key differentiator for alimentar.
Sourcepub async fn upload_batch(
&self,
path_in_repo: &str,
batch: &RecordBatch,
) -> Result<()>
pub async fn upload_batch( &self, path_in_repo: &str, batch: &RecordBatch, ) -> Result<()>
Uploads a RecordBatch as a parquet file.
Sourcepub async fn upload_parquet_file(
&self,
local_path: &Path,
path_in_repo: &str,
) -> Result<()>
pub async fn upload_parquet_file( &self, local_path: &Path, path_in_repo: &str, ) -> Result<()>
Uploads a local parquet file to the repository.
Sourcepub fn create_repo_sync(&self) -> Result<()>
pub fn create_repo_sync(&self) -> Result<()>
Synchronous wrapper for creating repo (for CLI use).
Sourcepub fn upload_file_sync(&self, path_in_repo: &str, data: &[u8]) -> Result<()>
pub fn upload_file_sync(&self, path_in_repo: &str, data: &[u8]) -> Result<()>
Synchronous wrapper for uploading file (for CLI use).
Sourcepub fn upload_parquet_file_sync(
&self,
local_path: &Path,
path_in_repo: &str,
) -> Result<()>
pub fn upload_parquet_file_sync( &self, local_path: &Path, path_in_repo: &str, ) -> Result<()>
Synchronous wrapper for uploading parquet file (for CLI use).
Sourcepub async fn upload_readme_validated(&self, content: &str) -> Result<()>
pub async fn upload_readme_validated(&self, content: &str) -> Result<()>
Uploads a README.md with validation.
Validates the dataset card metadata before upload to catch issues like
invalid task_categories before they cause HuggingFace warnings.
§Errors
Returns an error if validation fails or upload fails.
Sourcepub fn upload_readme_validated_sync(&self, content: &str) -> Result<()>
pub fn upload_readme_validated_sync(&self, content: &str) -> Result<()>
Synchronous wrapper for validated README upload.
Trait Implementations§
Source§impl Clone for HfPublisher
impl Clone for HfPublisher
Source§fn clone(&self) -> HfPublisher
fn clone(&self) -> HfPublisher
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreAuto Trait Implementations§
impl Freeze for HfPublisher
impl RefUnwindSafe for HfPublisher
impl Send for HfPublisher
impl Sync for HfPublisher
impl Unpin for HfPublisher
impl UnsafeUnpin for HfPublisher
impl UnwindSafe for HfPublisher
Blanket Implementations§
impl<T> Allocation for T
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more