cervo_core/inferer/
dynamic.rs

1use super::{helpers, Inferer};
2use crate::{batcher::ScratchPadView, model_api::ModelApi};
3use anyhow::Result;
4use tract_core::prelude::{tvec, TValue, TVec, Tensor, TractResult, TypedModel, TypedSimplePlan};
5use tract_hir::prelude::InferenceModel;
6
7/// The dynamic inferer hits a spot between the raw simplicity of a [`crate::prelude::BasicInferer`] and the spikiness
8/// of a [`crate::prelude::MemoizingDynamicInferer`]. Instead of explicitly concretizing models and caching them, it
9/// relies on tracts internal concretization which leads to worse performance overall; but beating out the
10/// [`crate::prelude::BasicInferer`].
11///
12/// # Pros
13///
14/// * Requires no tuning for OK results
15/// * Fixed memory and fairly linear performance scaling
16///
17/// # Cons
18///
19/// * Small extra overhead for small extra performance
20/// * Worst option for small batch sizes
21pub struct DynamicInferer {
22    model: TypedSimplePlan<TypedModel>,
23    model_api: ModelApi,
24}
25
26impl DynamicInferer {
27    /// Create an inferer for the provided `inference` model.
28    ///
29    /// # Errors
30    ///
31    /// Will only forward errors from the [`tract_core::model::Graph`] optimization and graph building steps.
32    pub fn from_model(model: InferenceModel) -> TractResult<Self> {
33        let model_api = ModelApi::for_model(&model)?;
34
35        let (_, model) = helpers::build_symbolic_model(model, &model_api.inputs)?;
36        let this = Self {
37            model: model.into_optimized()?.into_runnable()?,
38            model_api,
39        };
40
41        Ok(this)
42    }
43
44    /// Create an inferer for the provided `typed` model.
45    ///
46    /// # Errors
47    ///
48    /// Will only forward errors from the [`tract_core::model::Graph`] optimization and graph building steps.
49    pub fn from_typed(mut model: TypedModel) -> TractResult<Self> {
50        let model_api = ModelApi::for_typed_model(&model)?;
51
52        let _ = helpers::build_symbolic_typed(&mut model)?;
53        let this = Self {
54            model: model.into_optimized()?.into_runnable()?,
55            model_api,
56        };
57
58        Ok(this)
59    }
60
61    fn build_inputs(&self, batch: &mut ScratchPadView<'_>) -> Result<TVec<TValue>> {
62        let size = batch.len();
63
64        let mut inputs = TVec::default();
65
66        for (idx, (name, shape)) in self.model_api.inputs.iter().enumerate() {
67            assert_eq!(name, batch.input_name(idx));
68
69            let mut full_shape = tvec![size];
70            full_shape.extend_from_slice(shape);
71
72            let total_count: usize = full_shape.iter().product();
73            assert_eq!(total_count, batch.input_slot(idx).len());
74
75            let shape = full_shape;
76
77            let tensor = Tensor::from_shape(&shape, batch.input_slot(idx))?;
78
79            inputs.push(tensor.into());
80        }
81
82        Ok(inputs)
83    }
84}
85
86impl Inferer for DynamicInferer {
87    fn select_batch_size(&self, max_count: usize) -> usize {
88        max_count
89    }
90
91    fn infer_raw(&self, pad: &mut ScratchPadView<'_>) -> Result<(), anyhow::Error> {
92        let inputs = self.build_inputs(pad)?;
93
94        // Run the optimized plan to get actions back!
95        let result = self.model.run(inputs)?;
96
97        for idx in 0..self.model_api.outputs.len() {
98            let value = result[idx].as_slice::<f32>()?;
99            pad.output_slot_mut(idx).copy_from_slice(value);
100        }
101
102        Ok(())
103    }
104
105    fn raw_input_shapes(&self) -> &[(String, Vec<usize>)] {
106        &self.model_api.inputs
107    }
108
109    fn raw_output_shapes(&self) -> &[(String, Vec<usize>)] {
110        &self.model_api.outputs
111    }
112
113    fn begin_agent(&self, _id: u64) {}
114    fn end_agent(&self, _id: u64) {}
115}