pub struct MigrationJob {
pub table: TableIdent,
pub old_column: String,
pub new_column: String,
pub text_column: String,
pub embed_fn: EmbedFn,
pub strategy: MigrationStrategy,
pub batch_size: usize,
pub new_model: Option<EmbeddingModelInfo>,
pub on_progress: Option<ProgressFn>,
}Expand description
Migrates embedding columns in an AI-Lake table to a new model.
Usage:
ⓘ
let job = MigrationJob {
table: TableIdent::new("default", "docs"),
old_column: "embedding".to_string(),
new_column: "embedding_v2".to_string(),
text_column: "chunk_text".to_string(),
embed_fn: Arc::new(|texts| Ok(my_model.encode(texts))),
strategy: MigrationStrategy::DualWriteThenCutover,
batch_size: 10_000,
new_model: Some(EmbeddingModelInfo::new("my-model-v2")),
on_progress: None,
};
job.run(catalog, store).await?;Fields§
§table: TableIdent§old_column: StringName of the embedding column to replace (e.g., “embedding”).
new_column: StringName to give the new embedding column (e.g., “embedding_v2”).
Can be the same as old_column to do an in-place model upgrade.
text_column: StringColumn in the Parquet files that holds the text to re-embed.
Defaults to chunk_text (the LlmContextSchema canonical name).
embed_fn: EmbedFnCallable that converts a slice of texts to embeddings.
Must return exactly texts.len() vectors, all of the same dimension.
strategy: MigrationStrategy§batch_size: usizeHow many rows to embed per embed_fn call. Tune based on model batch size.
new_model: Option<EmbeddingModelInfo>Metadata for the new embedding model — stored in Iceberg properties after migration.
on_progress: Option<ProgressFn>Optional callback called after each file completes.
Implementations§
Source§impl MigrationJob
impl MigrationJob
pub async fn run( self, catalog: Arc<dyn CatalogProvider>, store: Arc<dyn Store>, ) -> AilakeResult<()>
Auto Trait Implementations§
impl !RefUnwindSafe for MigrationJob
impl !UnwindSafe for MigrationJob
impl Freeze for MigrationJob
impl Send for MigrationJob
impl Sync for MigrationJob
impl Unpin for MigrationJob
impl UnsafeUnpin for MigrationJob
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more