Skip to main content

Pipeline

Trait Pipeline 

Source
pub trait Pipeline:
    Send
    + Sync
    + PreProcessingMixin
    + IsqPipelineMixin
    + CacheManagerMixin
    + MetadataMixin
    + AnyMoePipelineMixin {
    // Required methods
    fn forward_inputs(
        &mut self,
        inputs: Box<dyn Any>,
        return_raw_logits: bool,
    ) -> Result<ForwardInputsResult, Error>;
    fn sample_causal_gen<'life0, 'life1, 'life2, 'life3, 'async_trait>(
        &'life0 self,
        seqs: &'life1 mut [&'life2 mut Sequence],
        logits: Vec<Tensor>,
        prefix_cacher: &'life3 mut PrefixCacheManagerV2,
        disable_eos_stop: bool,
        rng: Arc<Mutex<Isaac64Rng>>,
    ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait,
             'life3: 'async_trait;
    fn category(&self) -> ModelCategory;

    // Provided methods
    fn attach_speculative(
        &mut self,
        _config: SpeculativeConfig,
    ) -> Result<(), Error> { ... }
    fn try_sample_speculative_causal_gen<'life0, 'life1, 'life2, 'life3, 'life4, 'async_trait>(
        &'life0 mut self,
        _input_seqs: &'life1 mut [&'life2 mut Sequence],
        _logits: &'life3 [Tensor],
        _prefix_cacher: &'life4 mut PrefixCacheManagerV2,
        _disable_eos_stop: bool,
        _rng: Arc<Mutex<Isaac64Rng>>,
        _metadata: Option<PagedAttentionMeta>,
    ) -> Pin<Box<dyn Future<Output = Result<bool, Error>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait,
             'life3: 'async_trait,
             'life4: 'async_trait { ... }
    fn step<'life0, 'life1, 'life2, 'life3, 'async_trait>(
        &'life0 mut self,
        input_seqs: &'life1 mut [&'life2 mut Sequence],
        is_prompt: bool,
        return_raw_logits: bool,
        prefix_cacher: &'life3 mut PrefixCacheManagerV2,
        disable_eos_stop: bool,
        rng: Arc<Mutex<Isaac64Rng>>,
        backend_metadata: CacheBackendMetadata,
    ) -> Pin<Box<dyn Future<Output = Result<Duration, Error>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait,
             'life3: 'async_trait { ... }
    fn encoder_cache_counters(
        &self,
    ) -> Option<(Arc<AtomicUsize>, Arc<AtomicUsize>)> { ... }
}

Required Methods§

Source

fn forward_inputs( &mut self, inputs: Box<dyn Any>, return_raw_logits: bool, ) -> Result<ForwardInputsResult, Error>

Source

fn sample_causal_gen<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, seqs: &'life1 mut [&'life2 mut Sequence], logits: Vec<Tensor>, prefix_cacher: &'life3 mut PrefixCacheManagerV2, disable_eos_stop: bool, rng: Arc<Mutex<Isaac64Rng>>, ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Source

fn category(&self) -> ModelCategory

Provided Methods§

Source

fn attach_speculative( &mut self, _config: SpeculativeConfig, ) -> Result<(), Error>

Source

fn try_sample_speculative_causal_gen<'life0, 'life1, 'life2, 'life3, 'life4, 'async_trait>( &'life0 mut self, _input_seqs: &'life1 mut [&'life2 mut Sequence], _logits: &'life3 [Tensor], _prefix_cacher: &'life4 mut PrefixCacheManagerV2, _disable_eos_stop: bool, _rng: Arc<Mutex<Isaac64Rng>>, _metadata: Option<PagedAttentionMeta>, ) -> Pin<Box<dyn Future<Output = Result<bool, Error>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait, 'life4: 'async_trait,

Source

fn step<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 mut self, input_seqs: &'life1 mut [&'life2 mut Sequence], is_prompt: bool, return_raw_logits: bool, prefix_cacher: &'life3 mut PrefixCacheManagerV2, disable_eos_stop: bool, rng: Arc<Mutex<Isaac64Rng>>, backend_metadata: CacheBackendMetadata, ) -> Pin<Box<dyn Future<Output = Result<Duration, Error>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Returns the total of model execution time.

Source

fn encoder_cache_counters(&self) -> Option<(Arc<AtomicUsize>, Arc<AtomicUsize>)>

Return encoder cache hit/miss counters (hits, misses) if this pipeline has an encoder cache.

Dyn Compatibility§

This trait is dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety".

Implementors§