pub struct FileSourceConfig {Show 13 fields
pub source_id: SourceId,
pub root: PathBuf,
pub trust: f32,
pub category_trust: HashMap<CategoryId, f32>,
pub follow_links: bool,
pub text_files_only: bool,
pub group_by_directory: bool,
pub title_replace_underscores: bool,
pub include_date_aware_default_recipe: bool,
pub index_dir: Option<PathBuf>,
pub default_triplet_recipes: Vec<TripletRecipe>,
pub taxonomy_builder: TaxonomyBuilder,
pub section_builder: SectionBuilder,
}Expand description
Configuration for a generic filesystem-backed data source.
Fields§
§source_id: SourceIdStable source identifier used in records and persistence keys.
root: PathBufRoot directory containing source files.
trust: f32Default quality trust score applied to generated records.
category_trust: HashMap<CategoryId, f32>Optional trust overrides keyed by taxonomy segment.
follow_links: boolWhether to follow symlinks during index walking.
text_files_only: boolWhether indexing should include only text files.
group_by_directory: boolWhether deterministic directory grouping is enabled.
title_replace_underscores: boolWhether title extraction should replace underscores with spaces.
include_date_aware_default_recipe: boolWhether default recipe set includes the date-aware negative lane.
index_dir: Option<PathBuf>Optional directory used for persisted file-corpus index stores.
When None, file-corpus indexing uses the managed cache discovery root.
Set this in tests to keep index writes inside temporary directories.
default_triplet_recipes: Vec<TripletRecipe>Optional default recipes returned by this source.
taxonomy_builder: TaxonomyBuilderTaxonomy builder invoked per file.
section_builder: SectionBuilderSection builder invoked per file.
Implementations§
Source§impl FileSourceConfig
impl FileSourceConfig
Sourcepub fn new(source_id: impl Into<SourceId>, root: impl Into<PathBuf>) -> Self
pub fn new(source_id: impl Into<SourceId>, root: impl Into<PathBuf>) -> Self
Create a config for a filesystem source with explicit id and root.
Sourcepub fn with_trust(self, trust: f32) -> Self
pub fn with_trust(self, trust: f32) -> Self
Override default trust score.
Sourcepub fn with_category_trust(
self,
category: impl Into<String>,
trust: f32,
) -> Self
pub fn with_category_trust( self, category: impl Into<String>, trust: f32, ) -> Self
Add a taxonomy-segment trust override.
Sourcepub fn with_follow_links(self, follow_links: bool) -> Self
pub fn with_follow_links(self, follow_links: bool) -> Self
Override whether symlinks are followed during index walk.
Sourcepub fn with_text_files_only(self, text_files_only: bool) -> Self
pub fn with_text_files_only(self, text_files_only: bool) -> Self
Override whether index walk includes only text files.
Sourcepub fn with_directory_grouping(self, group_by_directory: bool) -> Self
pub fn with_directory_grouping(self, group_by_directory: bool) -> Self
Enable or disable deterministic directory grouping.
Sourcepub fn with_title_replace_underscores(self, replace_underscores: bool) -> Self
pub fn with_title_replace_underscores(self, replace_underscores: bool) -> Self
Set whether title extraction replaces underscores with spaces.
Sourcepub fn with_date_aware_default_recipe(self, include: bool) -> Self
pub fn with_date_aware_default_recipe(self, include: bool) -> Self
Enable/disable the date-aware default recipe lane (WrongPublicationDate).
“Date-aware” here uses publication-date metadata on records (for example taxonomy/meta date fields), not filesystem timestamps from source files.
Sourcepub fn with_index_dir(self, index_dir: impl Into<PathBuf>) -> Self
pub fn with_index_dir(self, index_dir: impl Into<PathBuf>) -> Self
Override the directory used to persist file-corpus index stores.
Sourcepub fn with_default_triplet_recipes(self, recipes: Vec<TripletRecipe>) -> Self
pub fn with_default_triplet_recipes(self, recipes: Vec<TripletRecipe>) -> Self
Set source-provided default triplet recipes.
Sourcepub fn with_taxonomy_builder(self, taxonomy_builder: TaxonomyBuilder) -> Self
pub fn with_taxonomy_builder(self, taxonomy_builder: TaxonomyBuilder) -> Self
Set a custom taxonomy builder.
Sourcepub fn with_section_builder(self, section_builder: SectionBuilder) -> Self
pub fn with_section_builder(self, section_builder: SectionBuilder) -> Self
Set a custom section builder.
Trait Implementations§
Source§impl Clone for FileSourceConfig
impl Clone for FileSourceConfig
Source§fn clone(&self) -> FileSourceConfig
fn clone(&self) -> FileSourceConfig
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreAuto Trait Implementations§
impl Freeze for FileSourceConfig
impl !RefUnwindSafe for FileSourceConfig
impl Send for FileSourceConfig
impl Sync for FileSourceConfig
impl Unpin for FileSourceConfig
impl UnsafeUnpin for FileSourceConfig
impl !UnwindSafe for FileSourceConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more