calyx_opt/passes/cell_share.rs
1use crate::analysis::{
2 AssignmentAnalysis, GraphColoring, LiveRangeAnalysis, ShareSet,
3 StaticParTiming,
4};
5use crate::traversal::{
6 Action, ConstructVisitor, Named, ParseVal, PassOpt, VisResult, Visitor,
7};
8use calyx_ir::rewriter;
9use calyx_ir::{self as ir};
10use calyx_utils::{CalyxResult, OutputFile};
11use itertools::Itertools;
12use serde_json::{json, Value};
13use std::collections::{HashMap, HashSet};
14
15// function to turn cell types to string when we are building the json for
16// share_freqs
17fn cell_type_to_string(cell_type: &ir::CellType) -> String {
18 match cell_type {
19 ir::CellType::Primitive {
20 name,
21 param_binding,
22 ..
23 } => {
24 let param_str = param_binding
25 .iter()
26 .map(|(id, val)| format!("{id}_{val}"))
27 .join("_");
28 format!("{name}_{param_str}")
29 }
30 ir::CellType::Component { name } => name.to_string(),
31 ir::CellType::ThisComponent => "ThisComponent".to_string(),
32 ir::CellType::Constant { val, width } => {
33 format!("Const_{val}_{width}")
34 }
35 }
36}
37
38/// Given a [LiveRangeAnalysis] that specifies the "share" and "state_share" cells
39/// alive at each group, minimizes the cells used for each component.
40///
41/// This works by constructing an interference graph for each alive "state_share" cell.
42/// If two cells are ever alive at the same time, then there is an edge
43/// between them in the interference graph. Additionally, if two cells
44/// are different prototypes, then there is an edge between them.
45///
46/// A greedy graph coloring algorithm on the interference graph
47/// is used to assign each cell a name.
48///
49/// By default, this pass will share a given cell as many times as possible. However,
50/// by passing a command line argument, we can limit the number of times a given
51/// cell is reused. The rationale behind this option is that, in certain cases,
52/// if you share a given component too much, the logic to determine when that
53/// component should be activated ends up being more expensive than just using
54/// a separate component. To pass this command line argument, you give three numbers:
55/// The number of times a given combinational component can be shared, the number
56/// of times a given register can be shared, and the number of times all other
57/// components can be shared. Generally we would want settings such that 1 < 2 < 3,
58/// since a given share of a 3) would save more hardware than a share of a 2), and
59/// a share of a 2) would save more hardware than a share of a 1).
60/// The exact command line syntax to use: if we had a file, "x.futil" and ran:
61/// `cargo run x.futil -x cell-share:bounds=2,4,8`, then we would only share a
62/// given combinational component at most twice, a given register at most 4 times,
63/// and all other components at most 8 times. If you wanted to do something with
64/// fud then run `fud e ... -s calyx.flags " -x cell-share:bounds=2,4,8"`. Finally
65/// if you do not want to bound the sharing for a particular cell type,
66/// you can pass -1 as a bound. So for example if you passed
67/// `-x cell-share:bounds=2,-1,3` this means that you will always share registers.
68/// Note: *The no spaces are important.*
69/// Also, if you pass the following flag: `-x cell-share:print-share-freqs=file-name`
70/// this pass will write a json to `file-name`. If want to print into stdout
71/// then just set the file-name to be "stdout" (you don't need the quotes
72/// when you actually pass in the argument, so run `-x cell-share:print-share-freqs=stdout`),
73/// and if you want to print to stderr then just set the file-name to be "stderr".
74/// The json will map an integer (say n) to the number of cells in the new design (i.e.,
75/// the design after sharing has been performed) that were shared
76/// exactly n times. So the key n = 2 will be mapped to the number of cells in the
77/// new design that are shared exactly twice.
78///
79/// Other flags:
80/// print_par_timing: prints the par-timing-map
81/// calyx_2020: shares using the Calyx 2020 settings: unlimited sharing of combinational
82/// components and registers, but no sharing of anything else
83///
84///
85/// This pass only renames uses of cells. [crate::passes::DeadCellRemoval] should be run after this
86/// to actually remove the definitions.
87pub struct CellShare {
88 live: LiveRangeAnalysis,
89 rewrites: HashMap<ir::Id, ir::RRC<ir::Cell>>,
90 /// Set of state shareable components (as type names)
91 state_shareable: ShareSet,
92
93 /// Set of shareable components (as type names)
94 shareable: ShareSet,
95
96 /// Cell active in continuous assignments, or ref cells (we want to ignore both)
97 cont_ref_cells: HashSet<ir::Id>,
98
99 /// Maps cell types to the corresponding pdf. Each pdf is a hashmap which maps
100 /// the number of times a given cell name reused (i.e., shared) to the
101 /// number of cells that have been shared that many times times.
102 share_freqs: HashMap<ir::Id, HashMap<ir::CellType, HashMap<i64, i64>>>,
103
104 /// maps the ids of groups to a set of tuples (i,j), the clock cycles (relative
105 /// to the start of the par) that group is live
106 par_timing_map: StaticParTiming,
107
108 // ========= Pass Options ============
109 /// The number of times a given class of cell can be shared. bounds should be
110 /// length 3 to hold the 3 classes: comb cells, registers, and everything else
111 bounds: [Option<i64>; 3],
112
113 /// executes cell share pass using Calyx 2020 benchmarks: no component
114 /// sharing, and only sharing registers and combinational components
115 calyx_2020: bool,
116
117 /// whether or not to print the share_freqs
118 print_share_freqs: Option<OutputFile>,
119 print_par_timing: Option<OutputFile>,
120}
121
122impl Named for CellShare {
123 fn name() -> &'static str {
124 "cell-share"
125 }
126 fn description() -> &'static str {
127 "use the fewest possible cells"
128 }
129
130 fn opts() -> Vec<PassOpt> {
131 vec![
132 PassOpt::new(
133 "print-share-freqs",
134 "print sharing frequencies",
135 ParseVal::OutStream(OutputFile::Null),
136 PassOpt::parse_outstream
137 ),
138 PassOpt::new(
139 "bounds",
140 "maximum amount of sharing for combinational components, registers, and other components. Negative valye means no bound",
141 ParseVal::List(vec![]),
142 PassOpt::parse_num_list
143 ),
144 PassOpt::new(
145 "print-par-timing",
146 "print timing information for `par` blocks",
147 ParseVal::OutStream(OutputFile::Null),
148 PassOpt::parse_outstream
149 ),
150 PassOpt::new(
151 "calyx-2020",
152 "share using the Calyx 2020 settings: no component sharing, only share registers/combinational components",
153 ParseVal::Bool(false),
154 PassOpt::parse_bool
155 )
156 ]
157 }
158}
159
160impl ConstructVisitor for CellShare {
161 fn from(ctx: &ir::Context) -> CalyxResult<Self> {
162 let state_shareable = ShareSet::from_context::<true>(ctx);
163 let shareable = ShareSet::from_context::<false>(ctx);
164 let opts = Self::get_opts(ctx);
165
166 Ok(CellShare {
167 live: LiveRangeAnalysis::default(),
168 rewrites: HashMap::new(),
169 cont_ref_cells: HashSet::new(),
170 state_shareable,
171 shareable,
172 par_timing_map: StaticParTiming::default(),
173 share_freqs: HashMap::new(),
174 calyx_2020: opts["calyx-2020"].bool(),
175 bounds: opts["bounds"].num_list_exact::<3>(),
176 print_par_timing: opts["print-par-timing"].not_null_outstream(),
177 print_share_freqs: opts["print-share-freqs"].not_null_outstream(),
178 })
179 }
180
181 fn clear_data(&mut self) {
182 self.rewrites = HashMap::new();
183 self.live = LiveRangeAnalysis::default();
184 self.cont_ref_cells = HashSet::new();
185 }
186}
187
188impl CellShare {
189 fn initialize(
190 &mut self,
191 comp: &ir::Component,
192 _sigs: &ir::LibrarySignatures,
193 ) {
194 //add cont cells
195 self.cont_ref_cells = comp
196 .continuous_assignments
197 .iter()
198 .analysis()
199 .cell_uses()
200 .map(|cr| cr.borrow().name())
201 .collect();
202 //add ref cells
203 self.cont_ref_cells.extend(
204 comp.cells
205 .iter()
206 .filter(|cell| cell.borrow().is_reference())
207 .map(|cell| cell.borrow().name()),
208 );
209
210 // TODO(rachit): Pass cont_ref_cells to LiveRangeAnalysis so that it ignores unneccessary
211 // cells.
212 self.live = LiveRangeAnalysis::new(
213 &mut comp.control.borrow_mut(),
214 self.state_shareable.clone(),
215 self.shareable.clone(),
216 );
217
218 self.par_timing_map = StaticParTiming::new(
219 &mut comp.control.borrow_mut(),
220 comp.name,
221 &self.live,
222 );
223 if let Some(stream) = &self.print_par_timing {
224 write!(stream.get_write(), "{:?}", self.par_timing_map).unwrap();
225 }
226 }
227
228 fn cell_filter(&self, cell: &ir::Cell) -> bool {
229 // Cells used in continuous assignments cannot be shared, nor can ref cells.
230 if self.cont_ref_cells.contains(&cell.name()) {
231 return false;
232 }
233 if let Some(ref name) = cell.type_name() {
234 self.state_shareable.contains(name) || self.shareable.contains(name)
235 } else {
236 false
237 }
238 }
239
240 // prints the json if self.print_share_freqs is not None
241 fn print_share_json(&self) {
242 if let Some(file) = &self.print_share_freqs {
243 let printable_share_freqs: HashMap<String, HashMap<String, _>> =
244 self.share_freqs
245 .iter()
246 .map(|(id, freq_map)| {
247 (
248 id.to_string(),
249 freq_map
250 .iter()
251 .map(|(cell_type, pdf)| {
252 (cell_type_to_string(cell_type), pdf)
253 })
254 .collect(),
255 )
256 })
257 .collect();
258 let json_share_freqs: Value = json!(printable_share_freqs);
259 write!(file.get_write(), "{}", json_share_freqs).unwrap()
260 }
261 }
262}
263
264/// The algorithm that runs is:
265/// - instantiate conflict graph using all component cells that satisfy `cell_filter`
266/// - use [ScheduleConflicts] to find groups/invokes that run in parallel with each other
267/// - for each tuple combination of cells that return true on cell_filter(), c1 and c2
268/// - first determine if their live ranges overlap. If so, then insert a conflict between
269/// c1 and c2
270/// - if c1 and c2 don't have overlapping live ranges, check if c1 and c2 are ever
271/// live at within the same par block, and they are live at different children
272/// of the par block. If the parent par is not static, then add a conflict.
273/// If the parent par is static, then we can use the static_par_timing analysis
274/// to check whether the cells' liveness actually overlaps.
275/// - perform graph coloring using `self.ordering` to define the order of the greedy coloring
276/// - use coloring to rewrite group assignments, continuous assignments, and conditional ports.
277impl Visitor for CellShare {
278 fn start(
279 &mut self,
280 comp: &mut ir::Component,
281 sigs: &ir::LibrarySignatures,
282 _comps: &[ir::Component],
283 ) -> VisResult {
284 self.initialize(comp, sigs);
285
286 let cells = comp.cells.iter().filter(|c| self.cell_filter(&c.borrow()));
287
288 // Mapping from cell type to names of all cells of that type.
289 let mut cells_by_type: HashMap<ir::CellType, Vec<ir::Id>> =
290 HashMap::new();
291 for cell in cells {
292 cells_by_type
293 .entry(cell.borrow().prototype.clone())
294 .or_default()
295 .push(cell.borrow().name());
296 }
297
298 // Maps cell type to conflict graph (will be used to perform coloring)
299 let mut graphs_by_type: HashMap<ir::CellType, GraphColoring<ir::Id>> =
300 cells_by_type
301 .clone()
302 .into_iter()
303 .map(|(key, cell_names)| {
304 (key, GraphColoring::from(cell_names.into_iter()))
305 })
306 .collect();
307
308 // We assume unique ids have already been computed by LiveRangeAnalysis
309
310 // live_once_map maps celltypes to maps that map cells to control statements
311 // in which the cell was live for at least one group/invoke. Furthermore,
312 // only control statements that are direct children of par blocks
313 // are included in this map.
314 let mut live_once_map = HashMap::new();
315 // Maps every control statement that is a direct child of a par block to
316 // its parent par block. (maps id number to id number)
317 let mut par_thread_map = HashMap::new();
318 // Maps celltype to map that maps cells to groups/invokes in which the cell is live.
319 let mut live_cell_map = HashMap::new();
320 // build live_once_map and par_thread_map
321 self.live.get_live_control_data(
322 &mut live_once_map,
323 &mut par_thread_map,
324 &mut live_cell_map,
325 &HashSet::new(),
326 &comp.control.borrow(),
327 );
328
329 // Adding the conflicts
330 for (cell_type, cells) in &cells_by_type {
331 // Run remove_dead_cells before this cell-share pass.
332 let g = graphs_by_type.get_mut(cell_type).unwrap();
333
334 // mapping from cell names to the enables/invokes in which it is live
335 let cell_to_nodes =
336 live_cell_map.entry(cell_type.clone()).or_default();
337 // mapping of cell names to the control statements in which it is live
338 // at least once. Only control statements that are direct children of
339 // par blocks are included
340 let cell_to_control =
341 live_once_map.entry(cell_type.clone()).or_default();
342 for (a, b) in cells.iter().tuple_combinations() {
343 // checking if live ranges overlap
344 // nodes (groups/invokes) in which a is live
345 if let Some(live_a) = cell_to_nodes.get(a) {
346 if let Some(live_b) = cell_to_nodes.get(b) {
347 if !live_a.is_disjoint(live_b) {
348 g.insert_conflict(a, b);
349 continue;
350 }
351 }
352 }
353 // checking if b is live at any groups/invokes running in parallel
354 // to groups/invokes live at a
355 // get the children of pars in which a was alive "at least once"
356 if let Some(live_once_a) = cell_to_control.get(a) {
357 // get the children of pars in which b was alive "at least once"
358 if let Some(live_once_b) = cell_to_control.get(b) {
359 'outer: for live_a in live_once_a {
360 for live_b in live_once_b {
361 // a and b are live within the same par block but not within
362 // the same child thread, then insert conflict.
363 let parent_a =
364 par_thread_map.get(live_a).unwrap();
365 let parent_b =
366 par_thread_map.get(live_b).unwrap();
367 if live_a != live_b && parent_a == parent_b {
368 // We have to check par_timing_map
369 // to see whether liveness overlaps.
370 // For dynamic pars, liveness_overlaps() returns
371 // true no matter what.
372 if self.par_timing_map.liveness_overlaps(
373 parent_a, live_a, live_b, a, b,
374 ) {
375 g.insert_conflict(a, b);
376 break 'outer;
377 }
378 }
379 }
380 }
381 }
382 }
383 }
384 }
385
386 // perform graph coloring to rename the cells
387 let mut coloring: rewriter::RewriteMap<ir::Cell> = HashMap::new();
388 let mut comp_share_freqs: HashMap<ir::CellType, HashMap<i64, i64>> =
389 HashMap::new();
390 let [comb_bound, reg_bound, other_bound] = &self.bounds;
391 for (cell_type, mut graph) in graphs_by_type {
392 // getting bound, based on self.bounds and cell_type
393 let bound = {
394 if let Some(ref name) = cell_type.get_name() {
395 let is_comb = self.shareable.contains(name);
396 let is_reg = name == "std_reg";
397 // if self.calyx_2020, then set bounds based on that
398 // otherwise, look at the actual self.bounds values to
399 // get the bounds
400 if self.calyx_2020 {
401 if is_comb || is_reg {
402 &None
403 } else {
404 &Some(1)
405 }
406 } else if is_comb {
407 comb_bound
408 } else if is_reg {
409 reg_bound
410 } else {
411 other_bound
412 }
413 } else {
414 // sharing bound doesn't really matter for ThisComponent/Constants
415 &None
416 }
417 };
418 if graph.has_nodes() {
419 coloring.extend(
420 graph
421 .color_greedy(*bound, false)
422 .iter()
423 .map(|(a, b)| (*a, comp.find_cell(*b).unwrap())),
424 );
425 // only generate share-freqs if we're going to use them.
426 if self.print_share_freqs.is_some() {
427 // must accumulate sharing numbers for share_freqs
428 comp_share_freqs.insert(cell_type, graph.get_share_freqs());
429 }
430 }
431 }
432
433 // add the sharing freqs for the component we just analyzed
434 if self.print_share_freqs.is_some() {
435 // must accumulate sharing numbers for share_freqs
436 self.share_freqs.insert(comp.name, comp_share_freqs);
437 // print share freqs json if self.print_share_freqs is not none
438 self.print_share_json();
439 }
440
441 // Rewrite assignments using the coloring generated.
442 let rewriter = ir::Rewriter {
443 cell_map: coloring,
444 ..Default::default()
445 };
446 comp.for_each_assignment(|assign| {
447 assign.for_each_port(|port| rewriter.get(port));
448 });
449 comp.for_each_static_assignment(|assign| {
450 assign.for_each_port(|port| rewriter.get(port));
451 });
452
453 // Rewrite control uses of ports
454 rewriter.rewrite_control(&mut comp.control.borrow_mut());
455
456 Ok(Action::Stop)
457 }
458}