cervo_core/inferer/
fixed.rs

1use super::{helpers, Inferer};
2use crate::{batcher::ScratchPadView, model_api::ModelApi};
3use anyhow::{Context, Result};
4use tract_core::prelude::{tvec, TValue, TVec, Tensor, TractResult, TypedModel, TypedSimplePlan};
5use tract_hir::prelude::InferenceModel;
6
7/// A reliable batched inferer that is a good fit if you know how much data you'll have and want stable performance.
8///
9/// As an added bonus, it'll subdivide your data into minibatches if the batching doesn't fit perfectly. To make this
10/// work, it'll add a single-element mode as well to ensure all data is consumed - such as if you feed it 9 elements
11/// with a configured batch size of 8.
12///
13/// You can configure a wide number of different batch sizes, and the largest one will be used. Note that the overhead for
14/// execution still is fairly large, but this helps amortize some of that cost away. For example; if you use a setup of [1,
15/// 2, 4, 8] as your supported batch sizes a batch of 15 elements would run each plan once.
16///
17/// # Pros
18///
19/// * Good and predictable performance if you know amount of data
20/// * Flexible if you sometimes get extra data to deal with
21///
22/// # Cons
23///
24/// * Mini-batches add overhead
25/// * Diminishing returns on each supported batch size.
26pub struct FixedBatchInferer {
27    model_api: ModelApi,
28    models: Vec<BatchedModel>,
29}
30
31fn fixup_sizes(sizes: &[usize]) -> Vec<usize> {
32    let mut sizes = sizes.to_vec();
33    if !sizes.contains(&1) {
34        sizes.push(1);
35    }
36    sizes.sort_unstable();
37    sizes.reverse();
38
39    sizes
40}
41
42impl FixedBatchInferer {
43    /// Create an inferer for the provided `inference` model.
44    ///
45    /// # Errors
46    ///
47    /// Will only forward errors from the [`tract_core::model::Graph`] optimization and graph building steps.
48    pub fn from_model(model: InferenceModel, sizes: &[usize]) -> TractResult<Self> {
49        let model_api = ModelApi::for_model(&model)?;
50
51        let sizes = fixup_sizes(sizes);
52
53        let models = sizes
54            .into_iter()
55            .map(|size| {
56                helpers::build_model(model.clone(), &model_api.inputs, size as i32)
57                    .map(|m| BatchedModel { size, plan: m })
58            })
59            .collect::<Result<Vec<_>>>()?;
60
61        Ok(Self { models, model_api })
62    }
63
64    /// Create an inferer for the provided typed model.
65    ///
66    /// # Errors
67    ///
68    /// Will only forward errors from the [`tract_core::model::Graph`] optimization and graph building steps.
69    pub fn from_typed(model: TypedModel, sizes: &[usize]) -> TractResult<Self> {
70        let model_api = ModelApi::for_typed_model(&model.clone())?;
71
72        let sizes = fixup_sizes(sizes);
73
74        let models = sizes
75            .into_iter()
76            .map(|size| {
77                helpers::build_typed(model.clone(), size as i32)
78                    .map(|m| BatchedModel { size, plan: m })
79            })
80            .collect::<Result<Vec<_>>>()?;
81
82        Ok(Self { models, model_api })
83    }
84}
85
86impl Inferer for FixedBatchInferer {
87    fn infer_raw(&self, batch: &mut ScratchPadView<'_>) -> Result<(), anyhow::Error> {
88        let plan = self
89            .models
90            .iter()
91            .find(|plan| plan.size == batch.len())
92            .with_context(|| anyhow::anyhow!("looking for a plan with size {:?}", batch.len()))?;
93
94        plan.execute(batch, &self.model_api)
95    }
96
97    fn select_batch_size(&self, max_count: usize) -> usize {
98        // Find the smallest batch size below or equal to max_count
99        self.models
100            .iter()
101            .map(|plan| plan.size)
102            .find(|size| *size <= max_count)
103            .unwrap()
104    }
105
106    fn raw_input_shapes(&self) -> &[(String, Vec<usize>)] {
107        &self.model_api.inputs
108    }
109
110    fn raw_output_shapes(&self) -> &[(String, Vec<usize>)] {
111        &self.model_api.outputs
112    }
113
114    fn begin_agent(&self, _id: u64) {}
115    fn end_agent(&self, _id: u64) {}
116}
117
118struct BatchedModel {
119    size: usize,
120    plan: TypedSimplePlan<TypedModel>,
121}
122
123impl BatchedModel {
124    fn build_inputs(
125        &self,
126        batch: &mut ScratchPadView<'_>,
127        model_api: &ModelApi,
128    ) -> Result<TVec<TValue>> {
129        assert_eq!(batch.len(), self.size);
130        let size = self.size;
131
132        let mut inputs = TVec::default();
133
134        for (idx, (name, shape)) in model_api.inputs.iter().enumerate() {
135            assert_eq!(name, batch.input_name(idx));
136
137            let mut full_shape = tvec![size];
138            full_shape.extend_from_slice(shape);
139
140            let total_count: usize = full_shape.iter().product();
141            assert_eq!(
142                total_count,
143                batch.input_slot(idx).len(),
144                "mismatched number of features: expected {:?}, got {:?} for shape {:?}",
145                total_count,
146                batch.input_slot(idx).len(),
147                full_shape
148            );
149
150            let shape = full_shape;
151
152            let tensor = Tensor::from_shape(&shape, batch.input_slot(idx))?;
153
154            inputs.push(tensor.into());
155        }
156
157        Ok(inputs)
158    }
159
160    fn execute(&self, pad: &mut ScratchPadView<'_>, model_api: &ModelApi) -> Result<()> {
161        let inputs = self.build_inputs(pad, model_api)?;
162        let result = self.plan.run(inputs)?;
163
164        for idx in 0..model_api.outputs.len() {
165            let value = result[idx].as_slice::<f32>()?;
166            pad.output_slot_mut(idx).copy_from_slice(value);
167        }
168
169        Ok(())
170    }
171}