pub struct JinaCodeEmbedder { /* private fields */ }Expand description
Jina Code Embeddings 1.5B specialized embedder.
This embedder is optimized for the jina-code-embeddings-1.5b model with:
- Task-specific instruction prefixes (NL2Code, Code2Code, etc.)
- Last-token pooling (required for this decoder-based model)
- Matryoshka dimension truncation (128, 256, 512, 1024, 1536)
- Automatic handling of the 32768 context window
§Asymmetric vs Symmetric Embedding Mode
For optimal retrieval quality, use different modes for indexing vs querying:
- Passage mode (default): Use for indexing code/documents - adds passage prefix
- Query mode: Use for search queries - adds query prefix
§Example
use aurora_semantic::{JinaCodeEmbedder, EmbeddingTask, MatryoshkaDimension, EmbeddingMode};
// For INDEXING: use Passage mode (default)
let indexer = JinaCodeEmbedder::from_directory("./models/jina-code-1.5b")?
.with_task(EmbeddingTask::NL2Code)
.with_mode(EmbeddingMode::Passage); // Default, can omit
// For SEARCHING: use Query mode
let searcher = JinaCodeEmbedder::from_directory("./models/jina-code-1.5b")?
.with_task(EmbeddingTask::NL2Code)
.with_mode(EmbeddingMode::Query);Implementations§
Source§impl JinaCodeEmbedder
impl JinaCodeEmbedder
Sourcepub const DEFAULT_MAX_LENGTH: usize = 32_768usize
pub const DEFAULT_MAX_LENGTH: usize = 32_768usize
Default max sequence length for Jina Code 1.5B.
Sourcepub const DEFAULT_DIMENSION: usize = 1_536usize
pub const DEFAULT_DIMENSION: usize = 1_536usize
Default dimension for Jina Code 1.5B.
Sourcepub fn from_directory<P: AsRef<Path>>(model_dir: P) -> Result<Self>
pub fn from_directory<P: AsRef<Path>>(model_dir: P) -> Result<Self>
Load Jina Code Embeddings 1.5B from a model directory.
The directory should contain:
model.onnx- The ONNX model filetokenizer.json- The HuggingFace tokenizer
Sourcepub fn from_onnx_embedder(inner: OnnxEmbedder) -> Self
pub fn from_onnx_embedder(inner: OnnxEmbedder) -> Self
Create from an existing OnnxEmbedder.
Sourcepub fn with_task(self, task: EmbeddingTask) -> Self
pub fn with_task(self, task: EmbeddingTask) -> Self
Set the embedding task (determines instruction prefix).
Sourcepub fn with_dimension(self, dimension: MatryoshkaDimension) -> Self
pub fn with_dimension(self, dimension: MatryoshkaDimension) -> Self
Set the output dimension (Matryoshka truncation).
Smaller dimensions reduce storage and speed up similarity search with minimal quality loss.
Sourcepub fn with_max_length(self, max_length: usize) -> Self
pub fn with_max_length(self, max_length: usize) -> Self
Set the maximum sequence length.
Sourcepub fn with_mode(self, mode: EmbeddingMode) -> Self
pub fn with_mode(self, mode: EmbeddingMode) -> Self
Set the embedding mode (Query or Passage).
- Passage (default): Use for indexing code - adds passage prefix
- Query: Use for search queries - adds query prefix
Sourcepub fn mode(&self) -> EmbeddingMode
pub fn mode(&self) -> EmbeddingMode
Get the current embedding mode.
Sourcepub fn task(&self) -> EmbeddingTask
pub fn task(&self) -> EmbeddingTask
Get the current task.
Sourcepub fn output_dimension(&self) -> MatryoshkaDimension
pub fn output_dimension(&self) -> MatryoshkaDimension
Get the output dimension.
Sourcepub fn execution_provider(&self) -> &ExecutionProviderInfo
pub fn execution_provider(&self) -> &ExecutionProviderInfo
Get information about the execution provider (CPU/GPU).
Sourcepub fn is_gpu_accelerated(&self) -> bool
pub fn is_gpu_accelerated(&self) -> bool
Check if GPU acceleration is being used.
Trait Implementations§
Source§impl Embedder for JinaCodeEmbedder
impl Embedder for JinaCodeEmbedder
Source§fn embed_for_query(&self, text: &str) -> Result<Vec<f32>>
fn embed_for_query(&self, text: &str) -> Result<Vec<f32>>
Override for asymmetric retrieval - always use query prefix for search queries.
Source§fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>
fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>
Source§fn max_sequence_length(&self) -> usize
fn max_sequence_length(&self) -> usize
Auto Trait Implementations§
impl !Freeze for JinaCodeEmbedder
impl !RefUnwindSafe for JinaCodeEmbedder
impl Send for JinaCodeEmbedder
impl Sync for JinaCodeEmbedder
impl Unpin for JinaCodeEmbedder
impl !UnwindSafe for JinaCodeEmbedder
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Downcast for Twhere
T: Any,
impl<T> Downcast for Twhere
T: Any,
Source§fn into_any(self: Box<T>) -> Box<dyn Any>
fn into_any(self: Box<T>) -> Box<dyn Any>
Box<dyn Trait> (where Trait: Downcast) to Box<dyn Any>. Box<dyn Any> can
then be further downcast into Box<ConcreteType> where ConcreteType implements Trait.Source§fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>
fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>
Rc<Trait> (where Trait: Downcast) to Rc<Any>. Rc<Any> can then be
further downcast into Rc<ConcreteType> where ConcreteType implements Trait.Source§fn as_any(&self) -> &(dyn Any + 'static)
fn as_any(&self) -> &(dyn Any + 'static)
&Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot
generate &Any’s vtable from &Trait’s.Source§fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
&mut Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot
generate &mut Any’s vtable from &mut Trait’s.Source§impl<T> DowncastSync for T
impl<T> DowncastSync for T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more