chomsky_cost/
lib.rs

1#![warn(missing_docs)]
2
3use chomsky_uir::IKun;
4
5#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
6pub struct Cost {
7    pub latency: f64,
8    pub throughput: f64,
9    pub size: f64,
10    pub energy: f64,
11}
12
13impl Default for Cost {
14    fn default() -> Self {
15        Cost {
16            latency: 0.0,
17            throughput: 1.0,
18            size: 0.0,
19            energy: 0.0,
20        }
21    }
22}
23
24impl Cost {
25    pub fn infinite() -> Self {
26        Cost {
27            latency: f64::INFINITY,
28            throughput: 0.0,
29            size: f64::INFINITY,
30            energy: f64::INFINITY,
31        }
32    }
33
34    pub fn add(&self, other: &Self) -> Self {
35        Cost {
36            latency: self.latency + other.latency,
37            throughput: self.throughput.min(other.throughput),
38            size: self.size + other.size,
39            energy: self.energy + other.energy,
40        }
41    }
42
43    pub fn score(&self) -> f64 {
44        if self.latency == f64::INFINITY
45            || self.size == f64::INFINITY
46            || self.energy == f64::INFINITY
47            || self.throughput <= 0.0
48        {
49            return f64::INFINITY;
50        }
51        // 改进的标量化成本：(latency / throughput) + size + energy
52        (self.latency / self.throughput) + self.size * 0.5 + self.energy * 0.2
53    }
54
55    pub fn weighted_score(&self, latency_weight: f64, size_weight: f64, energy_weight: f64) -> f64 {
56        if self.latency == f64::INFINITY
57            || self.size == f64::INFINITY
58            || self.energy == f64::INFINITY
59        {
60            return f64::INFINITY;
61        }
62        self.latency * latency_weight + self.size * size_weight + self.energy * energy_weight
63    }
64}
65
66pub trait CostModel {
67    fn cost(&self, enode: &IKun) -> Cost;
68
69    /// 获取该模型的默认权重
70    fn weights(&self) -> (f64, f64, f64) {
71        (1.0, 0.5, 0.2)
72    }
73}
74
75impl CostModel for &dyn CostModel {
76    fn cost(&self, enode: &IKun) -> Cost {
77        (**self).cost(enode)
78    }
79    fn weights(&self) -> (f64, f64, f64) {
80        (**self).weights()
81    }
82}
83
84impl CostModel for Box<dyn CostModel> {
85    fn cost(&self, enode: &IKun) -> Cost {
86        (**self).cost(enode)
87    }
88    fn weights(&self) -> (f64, f64, f64) {
89        (**self).weights()
90    }
91}
92
93#[derive(Debug, Clone, Default)]
94pub struct DefaultCostModel;
95
96impl CostModel for DefaultCostModel {
97    fn cost(&self, _enode: &IKun) -> Cost {
98        Cost::default()
99    }
100}
101
102pub static DEFAULT_COST_MODEL: DefaultCostModel = DefaultCostModel;
103
104#[derive(Debug, Clone)]
105pub struct JsCostModel {
106    pub prefer_loop: bool,
107}
108
109impl JsCostModel {
110    pub fn new(prefer_loop: bool) -> Self {
111        Self { prefer_loop }
112    }
113}
114
115impl CostModel for JsCostModel {
116    fn cost(&self, enode: &IKun) -> Cost {
117        match enode {
118            IKun::Map(_, _) | IKun::Filter(_, _) => Cost {
119                latency: 5.0,
120                throughput: 1.0,
121                size: 1.0,
122                energy: 1.0,
123            },
124
125            IKun::SoAMap(_, _) => Cost {
126                latency: 1.0,
127                throughput: 10.0,
128                size: 1.0,
129                energy: 0.5,
130            },
131
132            IKun::TiledMap(_, _, _) => Cost {
133                latency: 1.5,
134                throughput: 20.0,
135                size: 2.0,
136                energy: 1.0,
137            },
138
139            IKun::VectorizedMap(_, _, _) => Cost {
140                latency: 0.8,
141                throughput: 40.0,
142                size: 1.5,
143                energy: 0.8,
144            },
145
146            IKun::Return(_) => Cost {
147                latency: 1.0,
148                throughput: 1.0,
149                size: 1.0,
150                energy: 0.1,
151            },
152
153            IKun::Reduce(_, _, _) => Cost {
154                latency: 6.0,
155                throughput: 1.0,
156                size: 1.0,
157                energy: 1.2,
158            },
159
160            IKun::Extension(name, _) => match name.as_str() {
161                "loop_map" | "loop_filter" | "loop_reduce" | "loop_map_reduce" => {
162                    if self.prefer_loop {
163                        Cost {
164                            latency: 2.0,
165                            throughput: 10.0,
166                            size: 2.0,
167                            energy: 1.0,
168                        }
169                    } else {
170                        Cost {
171                            latency: 5.0,
172                            throughput: 2.0,
173                            size: 10.0,
174                            energy: 2.0,
175                        }
176                    }
177                }
178                "and_predicate" => Cost {
179                    latency: 0.5,
180                    throughput: 1.0,
181                    size: 1.0,
182                    energy: 0.5,
183                },
184                "filter_map" => Cost {
185                    latency: 1.0,
186                    throughput: 5.0,
187                    size: 2.0,
188                    energy: 1.0,
189                },
190                "add" | "sub" => Cost {
191                    latency: 0.6,
192                    throughput: 2.0,
193                    size: 1.0,
194                    energy: 0.2,
195                },
196                "shl" | "shr" => Cost {
197                    latency: 0.4,
198                    throughput: 2.0,
199                    size: 1.0,
200                    energy: 0.1,
201                },
202                "mul" => Cost {
203                    latency: 0.8,
204                    throughput: 1.5,
205                    size: 1.2,
206                    energy: 0.3,
207                },
208                _ => Cost {
209                    latency: 1.0,
210                    throughput: 1.0,
211                    size: 1.0,
212                    energy: 1.0,
213                },
214            },
215
216            IKun::Seq(ids) => Cost {
217                latency: ids.len() as f64 * 0.1,
218                throughput: 1.0,
219                size: ids.len() as f64 * 0.1,
220                energy: ids.len() as f64 * 0.1,
221            },
222
223            _ => Cost::default(),
224        }
225    }
226}
227
228#[derive(Debug, Clone)]
229pub struct CpuCostModel;
230
231impl CostModel for CpuCostModel {
232    fn cost(&self, enode: &IKun) -> Cost {
233        match enode {
234            IKun::Constant(_) | IKun::FloatConstant(_) | IKun::BooleanConstant(_) => Cost {
235                latency: 1.0,
236                throughput: 4.0,
237                size: 1.0,
238                energy: 0.1,
239            },
240            IKun::Symbol(_) => Cost {
241                latency: 1.0,
242                throughput: 4.0,
243                size: 0.0,
244                energy: 0.0,
245            },
246            IKun::Return(_) => Cost {
247                latency: 1.0,
248                throughput: 4.0,
249                size: 1.0,
250                energy: 0.1,
251            },
252            IKun::Map(_, _) => Cost {
253                latency: 10.0,
254                throughput: 1.0,
255                size: 5.0,
256                energy: 2.0,
257            },
258            IKun::VectorizedMap(_, _, _) => Cost {
259                latency: 2.0,
260                throughput: 8.0,
261                size: 10.0,
262                energy: 1.0,
263            },
264            IKun::TiledMap(_, _, _) => Cost {
265                latency: 5.0,
266                throughput: 2.0,
267                size: 8.0,
268                energy: 1.5,
269            },
270            IKun::CpuMap(_, _) => Cost {
271                latency: 1.0,
272                throughput: 4.0,
273                size: 1.0,
274                energy: 0.5,
275            },
276            IKun::GpuMap(_, _) => Cost::infinite(), // CPU model cannot run GPU map
277            _ => Cost {
278                latency: 2.0,
279                throughput: 1.0,
280                size: 2.0,
281                energy: 1.0,
282            },
283        }
284    }
285}
286
287#[derive(Debug, Clone)]
288pub struct GpuCostModel;
289
290impl CostModel for GpuCostModel {
291    fn cost(&self, enode: &IKun) -> Cost {
292        match enode {
293            IKun::Constant(_) | IKun::FloatConstant(_) | IKun::BooleanConstant(_) => Cost {
294                latency: 1.0,
295                throughput: 32.0,
296                size: 1.0,
297                energy: 0.05,
298            },
299            IKun::GpuMap(_, _) => Cost {
300                latency: 5.0,
301                throughput: 100.0,
302                size: 10.0,
303                energy: 5.0,
304            },
305            IKun::CpuMap(_, _) => Cost::infinite(), // GPU model cannot run CPU map
306            IKun::Map(_, _) => Cost {
307                latency: 20.0,
308                throughput: 0.5,
309                size: 10.0,
310                energy: 10.0,
311            }, // Unoptimized map is expensive on GPU
312            _ => Cost {
313                latency: 10.0,
314                throughput: 10.0,
315                size: 5.0,
316                energy: 2.0,
317            },
318        }
319    }
320
321    fn weights(&self) -> (f64, f64, f64) {
322        (0.1, 1.0, 2.0) // On GPU, throughput is key, latency is less important
323    }
324}
325
326#[derive(Debug, Clone, Copy, PartialEq, Eq)]
327pub enum Backend {
328    Js,
329    Cpu,
330    Gpu,
331}
332
333pub struct CostEvaluator {
334    pub js: JsCostModel,
335    pub cpu: CpuCostModel,
336    pub gpu: GpuCostModel,
337}
338
339impl Default for CostEvaluator {
340    fn default() -> Self {
341        Self::new()
342    }
343}
344
345impl CostEvaluator {
346    pub fn new() -> Self {
347        Self {
348            js: JsCostModel::new(true),
349            cpu: CpuCostModel,
350            gpu: GpuCostModel,
351        }
352    }
353
354    pub fn evaluate_all(&self, enode: &IKun) -> Vec<(Backend, Cost)> {
355        vec![
356            (Backend::Js, self.js.cost(enode)),
357            (Backend::Cpu, self.cpu.cost(enode)),
358            (Backend::Gpu, self.gpu.cost(enode)),
359        ]
360    }
361
362    pub fn best_backend(&self, enode: &IKun) -> (Backend, Cost) {
363        let costs = self.evaluate_all(enode);
364        costs
365            .into_iter()
366            .min_by(|(_, a), (_, b)| {
367                a.score()
368                    .partial_cmp(&b.score())
369                    .unwrap_or(std::cmp::Ordering::Equal)
370            })
371            .unwrap()
372    }
373
374    pub fn get_model(&self, backend: Backend) -> Box<dyn CostModel> {
375        match backend {
376            Backend::Js => Box::new(self.js.clone()),
377            Backend::Cpu => Box::new(self.cpu.clone()),
378            Backend::Gpu => Box::new(self.gpu.clone()),
379        }
380    }
381}
chomsky_cost/lib.rs

chomsky_cost/
lib.rs