pub struct CustomDataset { /* private fields */ }Expand description
A custom dataset defined at runtime.
Use this when you need to load a dataset that isn’t in the built-in
DatasetId enum.
§Example
use anno_core::core::dataset::{CustomDataset, Task, ParserHint, License, Domain};
use std::path::PathBuf;
let dataset = CustomDataset::new("my_ner_data", Task::NER)
.with_name("My Company NER Dataset")
.with_languages(&["en", "de"])
.with_entity_types(&["PRODUCT", "TEAM", "PROJECT"])
.with_parser(ParserHint::CoNLL)
.with_license(License::Proprietary)
.with_domain(Domain::Technical)
.with_path(PathBuf::from("/data/my_ner.conll"));Implementations§
Source§impl CustomDataset
impl CustomDataset
Sourcepub fn new(id: impl Into<String>, task: Task) -> CustomDataset
pub fn new(id: impl Into<String>, task: Task) -> CustomDataset
Create a new custom dataset with minimal required fields.
Sourcepub fn with_name(self, name: impl Into<String>) -> CustomDataset
pub fn with_name(self, name: impl Into<String>) -> CustomDataset
Set the human-readable name.
Sourcepub fn with_languages(self, langs: &[&str]) -> CustomDataset
pub fn with_languages(self, langs: &[&str]) -> CustomDataset
Set the languages covered.
Sourcepub fn with_entity_types(self, types: &[&str]) -> CustomDataset
pub fn with_entity_types(self, types: &[&str]) -> CustomDataset
Set the entity types.
Sourcepub fn with_parser(self, parser: ParserHint) -> CustomDataset
pub fn with_parser(self, parser: ParserHint) -> CustomDataset
Set the parser hint.
Sourcepub fn with_license(self, license: License) -> CustomDataset
pub fn with_license(self, license: License) -> CustomDataset
Set the license.
Sourcepub fn with_description(self, desc: impl Into<String>) -> CustomDataset
pub fn with_description(self, desc: impl Into<String>) -> CustomDataset
Set the description.
Sourcepub fn with_domain(self, domain: Domain) -> CustomDataset
pub fn with_domain(self, domain: Domain) -> CustomDataset
Set the domain.
Sourcepub fn with_url(self, url: impl Into<String>) -> CustomDataset
pub fn with_url(self, url: impl Into<String>) -> CustomDataset
Set the download URL.
Sourcepub fn with_path(self, path: PathBuf) -> CustomDataset
pub fn with_path(self, path: PathBuf) -> CustomDataset
Set the local file path.
Sourcepub fn with_stats(self, stats: DatasetStats) -> CustomDataset
pub fn with_stats(self, stats: DatasetStats) -> CustomDataset
Set dataset statistics.
Sourcepub fn with_temporal_coverage(self, coverage: TemporalCoverage) -> CustomDataset
pub fn with_temporal_coverage(self, coverage: TemporalCoverage) -> CustomDataset
Set temporal coverage.
Sourcepub fn with_secondary_tasks(self, tasks: Vec<Task>) -> CustomDataset
pub fn with_secondary_tasks(self, tasks: Vec<Task>) -> CustomDataset
Add secondary tasks.
Sourcepub fn constructed(self) -> CustomDataset
pub fn constructed(self) -> CustomDataset
Mark as constructed language.
Sourcepub fn historical(self) -> CustomDataset
pub fn historical(self) -> CustomDataset
Mark as historical language.
Sourcepub fn requires_authentication(self) -> CustomDataset
pub fn requires_authentication(self) -> CustomDataset
Mark as requiring authentication.
Sourcepub fn with_version(self, version: impl Into<String>) -> CustomDataset
pub fn with_version(self, version: impl Into<String>) -> CustomDataset
Set version string.
Sourcepub fn languages_owned(&self) -> &[String]
pub fn languages_owned(&self) -> &[String]
Get languages as owned strings (for custom datasets).
Sourcepub fn entity_types_owned(&self) -> &[String]
pub fn entity_types_owned(&self) -> &[String]
Get entity types as owned strings (for custom datasets).
Sourcepub fn with_notes(self, notes: impl Into<String>) -> CustomDataset
pub fn with_notes(self, notes: impl Into<String>) -> CustomDataset
Set notes.
Sourcepub fn with_citation(self, citation: impl Into<String>) -> CustomDataset
pub fn with_citation(self, citation: impl Into<String>) -> CustomDataset
Set citation.
Trait Implementations§
Source§impl Clone for CustomDataset
impl Clone for CustomDataset
Source§fn clone(&self) -> CustomDataset
fn clone(&self) -> CustomDataset
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl DatasetSpec for CustomDataset
impl DatasetSpec for CustomDataset
Source§fn entity_types(&self) -> &[&str]
fn entity_types(&self) -> &[&str]
Entity types annotated in this dataset. Read more
Source§fn parser_hint(&self) -> ParserHint
fn parser_hint(&self) -> ParserHint
Parser format hint for loading.
Source§fn description(&self) -> Option<&str>
fn description(&self) -> Option<&str>
Detailed description of the dataset.
Source§fn download_url(&self) -> Option<&str>
fn download_url(&self) -> Option<&str>
URL for downloading the dataset.
Source§fn local_path(&self) -> Option<&Path>
fn local_path(&self) -> Option<&Path>
Local path if already downloaded.
Source§fn stats(&self) -> DatasetStats
fn stats(&self) -> DatasetStats
Dataset statistics (counts, splits).
Source§fn temporal_coverage(&self) -> TemporalCoverage
fn temporal_coverage(&self) -> TemporalCoverage
Temporal coverage information.
Source§fn secondary_tasks(&self) -> &[Task]
fn secondary_tasks(&self) -> &[Task]
Additional tasks supported beyond the primary task.
Source§fn is_constructed_language(&self) -> bool
fn is_constructed_language(&self) -> bool
Whether this is a constructed/artificial language dataset.
Source§fn is_historical(&self) -> bool
fn is_historical(&self) -> bool
Whether this is a historical/ancient language dataset.
Source§fn requires_auth(&self) -> bool
fn requires_auth(&self) -> bool
Whether this dataset requires special access (gated, auth, etc.).
Source§fn languages_vec(&self) -> Vec<String>
fn languages_vec(&self) -> Vec<String>
Get languages as owned Vec (for custom datasets that don’t have static data). Read more
Source§fn entity_types_vec(&self) -> Vec<String>
fn entity_types_vec(&self) -> Vec<String>
Get entity types as owned Vec (for custom datasets that don’t have static data). Read more
Source§fn supports_task(&self, task: Task) -> bool
fn supports_task(&self, task: Task) -> bool
Check if this dataset supports a specific task.
Source§fn supports_language(&self, lang: &str) -> bool
fn supports_language(&self, lang: &str) -> bool
Check if this dataset covers a specific language.
Source§fn has_entity_type(&self, entity_type: &str) -> bool
fn has_entity_type(&self, entity_type: &str) -> bool
Check if this dataset has a specific entity type.
Auto Trait Implementations§
impl Freeze for CustomDataset
impl RefUnwindSafe for CustomDataset
impl Send for CustomDataset
impl Sync for CustomDataset
impl Unpin for CustomDataset
impl UnsafeUnpin for CustomDataset
impl UnwindSafe for CustomDataset
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more