starlight 0.4.0

experimental HDL and optimizer for DAGs of lookup tables
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
use std::{
    cmp::max,
    collections::BinaryHeap,
    num::{NonZeroU32, NonZeroU64},
};

use awint::awint_dag::triple_arena::{Advancer, Ptr};

use crate::{
    route::{ChannelWidths, Channeler, PEmbedding, Programmability, Referent},
    utils::SmallSet,
    Error,
};

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct InternalBehavior {
    // note for future changes that the hierarchy generating is using this for ordering

    // looking all the way at the bottom of the hierarchy, this counts the total number of subnodes
    pub subnodes_in_tree: usize,

    pub lut_bits: usize,
}

impl InternalBehavior {
    pub fn empty() -> Self {
        Self {
            subnodes_in_tree: 1,
            lut_bits: 0,
        }
    }
}

/// A channel node
#[derive(Debug, Clone)]
pub struct CNode<PCNode: Ptr, PCEdge: Ptr> {
    pub p_this_cnode: PCNode,
    pub lvl: u16,
    pub p_supernode: Option<PCNode>,
    pub internal_behavior: InternalBehavior,
    pub embeddings: SmallSet<PEmbedding>,
    pub alg_visit: NonZeroU64,
    pub alg_entry_width: usize,
    // this is used in Dijkstras' and points backwards
    pub alg_edge: (Option<PCEdge>, usize),
}

impl<PCNode: Ptr, PCEdge: Ptr> CNode<PCNode, PCEdge> {
    pub fn internal_behavior(&self) -> &InternalBehavior {
        &self.internal_behavior
    }
}

impl<PCNode: Ptr, PCEdge: Ptr> Channeler<PCNode, PCEdge> {
    /// Given the `subnodes` (which should point to unique `ThisCNode`s) for a
    /// new top level `CNode`, this will manage the backrefs
    pub fn make_top_level_cnode<I>(
        &mut self,
        subnodes: I,
        lvl: u16,
        internal_behavior: InternalBehavior,
    ) -> PCNode
    where
        I: IntoIterator<Item = PCNode>,
    {
        let p_cnode = self.cnodes.insert_with(|p_this_cnode| {
            (Referent::ThisCNode, CNode {
                p_this_cnode,
                lvl,
                p_supernode: None,
                internal_behavior,
                embeddings: SmallSet::new(),
                alg_visit: NonZeroU64::new(1).unwrap(),
                alg_entry_width: 0,
                alg_edge: (None, 0),
            })
        });
        for p_subnode in subnodes {
            if let Some(p) = self.top_level_cnodes.find_key(&p_subnode) {
                self.top_level_cnodes.remove(p).unwrap();
            }
            let p_supernode = self
                .cnodes
                .insert_key(p_cnode, Referent::SubNode(p_subnode))
                .unwrap();
            let cnode = self.cnodes.get_val_mut(p_subnode).unwrap();
            cnode.p_supernode = Some(p_supernode);
        }
        let _ = self.top_level_cnodes.insert(p_cnode, ());
        p_cnode
    }

    #[must_use]
    pub fn get_supernode_referent(&self, p: PCNode) -> Option<PCNode> {
        self.cnodes.get_val(p)?.p_supernode
    }

    #[must_use]
    pub fn get_supernode(&self, p: PCNode) -> Option<PCNode> {
        let p_supernode_ref = self.cnodes.get_val(p)?.p_supernode?;
        Some(self.cnodes.get_val(p_supernode_ref)?.p_this_cnode)
    }

    /// Given two `CNode`s, this will find their lowest level common supernode
    /// (or just return the higher level of the two if one is a supernode of the
    /// other, or return one if they are equal). Can only return `None` if there
    /// are disjoint `CNode` hiearchies. If this function is used in a loop with
    /// a common accumulator, this will find the common supernode of all the
    /// nodes.
    pub fn find_common_supernode(&self, p_back0: PCNode, p_back1: PCNode) -> Option<PCNode> {
        let cnode0 = self.cnodes.get_val(p_back0).unwrap();
        let mut lvl0 = cnode0.lvl;
        let mut p_cnode0 = cnode0.p_this_cnode;
        let cnode1 = self.cnodes.get_val(p_back1).unwrap();
        let mut lvl1 = cnode1.lvl;
        let mut p_cnode1 = cnode1.p_this_cnode;
        // first get on same level
        loop {
            // have this run first for all cases
            if p_cnode0 == p_cnode1 {
                // case where one is the supernode of the other
                return Some(p_cnode0)
            }
            if lvl0 < lvl1 {
                if let Some(p_super_cnode) = self.get_supernode(p_cnode0) {
                    p_cnode0 = p_super_cnode;
                } else {
                    return None
                }
                lvl0 += 1;
            } else if lvl0 > lvl1 {
                if let Some(p_super_cnode) = self.get_supernode(p_cnode1) {
                    p_cnode1 = p_super_cnode;
                } else {
                    return None
                }
                lvl1 += 1;
            } else {
                break
            }
        }
        // find common supernode
        loop {
            if let Some(p_super_cnode) = self.get_supernode(p_cnode0) {
                p_cnode0 = p_super_cnode;
            } else {
                return None
            }
            if let Some(p_super_cnode) = self.get_supernode(p_cnode1) {
                p_cnode1 = p_super_cnode;
            } else {
                return None
            }
            if p_cnode0 == p_cnode1 {
                return Some(p_cnode0)
            }
        }
    }
}

/*
here are the current ideas on the channeling hierarchy

We know we want a hierarchy for the target and a hierarchy for the program.
The routing starts by having an embedding of the single top level program cnode
into the single top level target cnode (modulo handling how we see fit for if
the target and/or program has disconnections). There are different kinds of steps:

(1.) Program dilution
In one kind of step, a program's embeddings
are "diluted" (as opposed to concentrating when looking from the bottom to the top
of the hierarchy) with a embedding of one program cnode into a target cnode being
broken into an embedding of that program cnode's subnodes into the same target cnode.

(2.) Target dilution
A program embedding is diluted with respect to the target channeler side, such that an
embedding of a program cnode into a target cnode is broken into an embedding of a program
cnode into a subnode of one of the target cnodes.
There is one step of embedding movement implicit in this, where we choose which
subnode to embed.

(3.) Embedding movement
As dilution proceeds and we get a higher resolution picture of the final embedding, we
will have to transverse move the embeddings to neighboring target cnodes

(4.) Concentration
Equivalent to the "rip-up and reroute" process where we find inaccuracies in the
bulk predictions and need to concentrate before retrying dilution.

The routing process progresses from the initial single top level embedding by first
diluting the program, and then diluting both while maintaining a more dilution
for the program than the target. There are usually multiple program cnodes embedded
into a single target cnode, until the lowest level.

There are distinct levels with no `CEdge`s between them

This allows the Lagrangian routing algorithm to start with completed paths between program-target
mappings, so that we do not constantly have to use maps to look up where we need to be moving loose
endpoints. The Lagrangians can do advanced things by themselves like promoting concentration or
dilution of paths to different cedges when necessary.
*/

/// Starting from unit `CNode`s and `CEdge`s describing all known low level
/// progam methods, this generates a logarithmic tree of higher level
/// `CNode`s and `CEdge`s that results in a single top level `CNode` from which
/// routing can start
///
/// We are currently assuming that `generate_hierarchy` is being run once on
/// a graph of unit channel nodes and edges
pub fn generate_hierarchy<PCNode: Ptr, PCEdge: Ptr>(
    channeler: &mut Channeler<PCNode, PCEdge>,
) -> Result<(), Error> {
    // when a `CNode` ends up with no edges to anything
    let mut final_top_level_cnodes = Vec::<PCNode>::new();
    let mut possibly_single_subnode = Vec::<PCNode>::new();
    let mut next_level_cnodes = Vec::<PCNode>::new();
    let mut priority = BinaryHeap::<(usize, PCNode)>::new();

    for cnode in channeler.cnodes.vals() {
        if cnode.lvl != 0 {
            return Err(Error::OtherStr(
                "hierarchy appears to have been generated before",
            ))
        }
        priority.push((0, cnode.p_this_cnode));
    }

    let mut current_lvl = 0u16;
    'outer: loop {
        let p_consider = if let Some((_, p_consider)) = priority.pop() {
            p_consider
        } else {
            if next_level_cnodes.is_empty() {
                break
            }
            current_lvl = current_lvl.checked_add(1).unwrap();
            // before going to the next level, need to handle this
            generate_hierarchy_level(
                current_lvl,
                channeler,
                &mut priority,
                &mut possibly_single_subnode,
                &mut next_level_cnodes,
            )?;
            continue;
        };
        let cnode = channeler.cnodes.get_val(p_consider).unwrap();
        if cnode.p_supernode.is_some() {
            // has already been concentrated
            continue
        }

        // For each cnode on a given level, we will attempt to concentrate it and all
        // its neighbors. If any neighbor has a supernode already, it skips the cnode

        let related = channeler.related_nodes(p_consider);
        if related.len() == 1 {
            // the node is disconnected
            final_top_level_cnodes.push(p_consider);
            continue
        }
        let mut subnodes_in_tree = 0usize;
        let mut lut_bits = 0usize;
        // check if any related nodes have supernodes
        for p_related in related.iter().copied() {
            let related_cnode = channeler.cnodes.get_val(p_related).unwrap();
            subnodes_in_tree = subnodes_in_tree
                .checked_add(related_cnode.internal_behavior.subnodes_in_tree)
                .unwrap();
            lut_bits = lut_bits
                .checked_add(related_cnode.internal_behavior.lut_bits)
                .unwrap();
            if related_cnode.p_supernode.is_some() {
                // We can't concentrate `p_consider` because it would concentrate related nodes
                // that are already concentrated, instead put it in `possibly_single_subnode`
                // because it may end up in a solution where it can't concentrate with any other
                // nodes because of overlap.
                possibly_single_subnode.push(p_consider);
                continue 'outer
            }
        }
        // concentrate
        let p_next_lvl = channeler.make_top_level_cnode(
            related.iter().copied(),
            current_lvl.checked_add(1).unwrap(),
            InternalBehavior {
                subnodes_in_tree,
                lut_bits,
            },
        );
        next_level_cnodes.push(p_next_lvl);
    }

    // TODO optimize to just use `final_top_level_cnodes` in this final step,
    // avoiding all other uses of `top_level_cnodes` in the middle

    // if there are multiple cnodes are left in an anticlique, concentrate them into
    // a single top level node
    if channeler.top_level_cnodes.len() > 1 {
        let mut set = vec![];
        let mut max_lvl = 0;
        let mut subnodes_in_tree = 0usize;
        let mut lut_bits = 0usize;
        for p_cnode in channeler.top_level_cnodes.keys().copied() {
            set.push(p_cnode);
            let cnode = channeler.cnodes.get_val(p_cnode).unwrap();
            subnodes_in_tree = subnodes_in_tree
                .checked_add(cnode.internal_behavior.subnodes_in_tree)
                .unwrap();
            lut_bits = lut_bits
                .checked_add(cnode.internal_behavior().lut_bits)
                .unwrap();
            max_lvl = max(max_lvl, cnode.lvl);
        }
        channeler.make_top_level_cnode(set, max_lvl.checked_add(1).unwrap(), InternalBehavior {
            subnodes_in_tree,
            lut_bits,
        });
    }
    Ok(())
}

pub fn generate_hierarchy_level<PCNode: Ptr, PCEdge: Ptr>(
    current_lvl: u16,
    channeler: &mut Channeler<PCNode, PCEdge>,
    priority: &mut BinaryHeap<(usize, PCNode)>,
    possibly_single_subnode: &mut Vec<PCNode>,
    next_level_cnodes: &mut Vec<PCNode>,
) -> Result<(), Error> {
    // for nodes that couldn't be concentrated, create single subnode supernodes for
    // them, so that edges are only between nodes at the same level
    for p in possibly_single_subnode.drain(..) {
        let cnode = channeler.cnodes.get_val(p).unwrap();
        if cnode.p_supernode.is_some() {
            continue
        }
        // need to also forward the internal behavior
        let p_next_lvl =
            channeler.make_top_level_cnode([p], current_lvl, cnode.internal_behavior().clone());
        next_level_cnodes.push(p_next_lvl);
    }

    // create bulk `CEdge`s between all nodes on the level
    for p_consider in next_level_cnodes.drain(..) {
        // first get the set of subnodes
        let direct_subnode_visit = channeler.next_alg_visit();
        let mut subnode_set = vec![];
        let mut subnode_adv = channeler.advancer_subnodes_of_node(p_consider);
        while let Some(p_subnode) = subnode_adv.advance(channeler) {
            channeler.cnodes.get_val_mut(p_subnode).unwrap().alg_visit = direct_subnode_visit;
            subnode_set.push(p_subnode);
        }
        // The current plan is that we just create one big edge that has its sink
        // incident in `p_consider`, with source incidents to all supernodes of subnodes
        // outside of the subnode set that have source incidents to an edge that has a
        // sink in the subnodes of `p_consider`. I'm not sure if we should discretize
        // this more since the channel source widths are tracked separately to begin
        // with. However I suspect that this is the correct approach because we can
        // simplify bulk edge behavior to only track channel widths and is the only
        // straightforward way to avoid `OrdArena`s.

        // iterate through the subnodes again, but now get a set of second neighbors
        // that aren't in the subnodes set
        let related_visit = channeler.next_alg_visit();
        let mut source_set = vec![];
        let mut channel_widths = ChannelWidths::empty();
        let mut lut_bits = 0usize;
        for p_subnode in subnode_set.iter().copied() {
            // go over all neighbors through the edges
            let mut adv = channeler.cnodes.advancer_surject(p_subnode);
            while let Some(p_referent) = adv.advance(&channeler.cnodes) {
                if let Referent::CEdgeIncidence(p_cedge, i) =
                    *channeler.cnodes.get_key(p_referent).unwrap()
                {
                    // avoid duplication, if this is a sink incidence we automatically have
                    // a one time iter of the edge we need to handle
                    if i.is_none() {
                        let cedge = channeler.cedges.get_mut(p_cedge).unwrap();

                        let w = match cedge.programmability() {
                            Programmability::TNode => 1,
                            Programmability::StaticLut(lut) => {
                                lut_bits = lut_bits.checked_add(lut.bw()).unwrap();
                                1
                            }
                            Programmability::ArbitraryLut(arbitrary_lut) => {
                                lut_bits = lut_bits
                                    .checked_add(arbitrary_lut.lut_config().len())
                                    .unwrap();
                                1
                            }
                            Programmability::SelectorLut(_) => 1,
                            Programmability::Bulk(bulk) => bulk.channel_exit_width,
                        };
                        channel_widths.channel_exit_width =
                            channel_widths.channel_exit_width.checked_add(w).unwrap();

                        for (i, p_incident) in cedge.sources().iter().copied().enumerate() {
                            let cnode = channeler.cnodes.get_val_mut(p_incident).unwrap();
                            // make sure the `CNode` is outside the direct subnode set
                            if cnode.alg_visit != direct_subnode_visit {
                                // avoid an `OrdArena` by accumulating the entry width on the
                                // related supernode
                                let p_supernode = cnode.p_supernode.unwrap();
                                let supernode = channeler.cnodes.get_val_mut(p_supernode).unwrap();
                                if supernode.alg_visit != related_visit {
                                    supernode.alg_visit = related_visit;
                                    supernode.alg_entry_width = 0;
                                    source_set.push(supernode.p_this_cnode);
                                }
                                let w = match cedge.programmability() {
                                    Programmability::TNode
                                    | Programmability::StaticLut(_)
                                    | Programmability::ArbitraryLut(_)
                                    | Programmability::SelectorLut(_) => 1,
                                    Programmability::Bulk(bulk) => bulk.channel_entry_widths[i],
                                };
                                supernode.alg_entry_width =
                                    supernode.alg_entry_width.checked_add(w).unwrap();
                            }
                            // else the connections are internal, TODO are there
                            // any internal connection statistics we should want
                            // to track?
                        }
                    }
                }
            }
        }
        // add on the bits from edges with sinks in `p_consider`
        let internal_behavior = &mut channeler
            .cnodes
            .get_val_mut(p_consider)
            .unwrap()
            .internal_behavior;
        internal_behavior.lut_bits = internal_behavior.lut_bits.checked_add(lut_bits).unwrap();
        // We want the edge source numbers to be mostly tractable. The tree will be
        // lopsided somewhat because of this, but will ultimately be WAVL-like balanced
        // because everything that doesn't have overlap issues will be concentrated
        // every round.
        let channel_exit_width = channel_widths.channel_exit_width;
        priority.push((channel_exit_width, p_consider));
        // create the edge
        if !source_set.is_empty() {
            for source in source_set.iter().cloned() {
                let cnode = channeler.cnodes.get_val(source).unwrap();
                channel_widths
                    .channel_entry_widths
                    .push(cnode.alg_entry_width);
            }
            // TODO the delay weight system is messed up for bulk edges, perhaps this is
            // where we can add more than one edge per concentrated node if the weights vary
            // wildly, e.g. for an island FPGA with some long range connections
            channeler.make_cedge(
                &source_set,
                p_consider,
                Programmability::Bulk(channel_widths),
                // gross approximation for now
                NonZeroU32::new(
                    u32::try_from(channel_exit_width.clamp(1, u32::MAX as usize)).unwrap(),
                )
                .unwrap(),
            );
        }
    }
    Ok(())
}