precomputed_map/builder/
codegen.rs

1use std::{ fs, fmt };
2use std::io::{ self, Write };
3use std::borrow::Cow;
4use std::path::PathBuf;
5use std::collections::HashMap;
6use super::{ MapOutput, MapKind };
7
8/// Code Generator
9///
10/// Generate code based on the constructed Map and the provided sequence.
11pub struct CodeBuilder<'a> {
12    name: String,
13    hash: String,
14    vis: Option<String>,
15    list: Vec<OutputEntry>,
16    u8seq_writer: &'a mut U8SeqWriter,
17    u32seq_writer: &'a mut U32SeqWriter,
18}
19
20/// U8 seq writer
21pub struct U8SeqWriter(BytesWriter);
22
23/// U32 seq writer
24pub struct U32SeqWriter(BytesWriter);
25
26struct BytesWriter {
27    entry: String,
28    file: PathBuf,
29    writer: Option<CountWriter<fs::File>>,
30}
31
32/// Short bytes pool
33pub struct ShortPool<'s> {
34    entry: String,
35    buf: Vec<u8>,
36    map: HashMap<Cow<'s, [u8]>, ShortId>
37}
38
39/// Short bytes Id
40#[derive(Clone, Copy)]
41pub struct ShortId(u32);
42
43/// Reference Id
44pub struct ReferenceId(usize);
45
46struct OutputEntry {
47    name: Option<String>,
48    kind: OutputKind
49}
50
51enum OutputKind {
52    Custom {
53        name: String,
54    },
55    U8Seq {
56        offset: usize,
57        len: usize
58    },
59    BytesPositionSeq {
60        offset: usize,
61        len: usize,
62        index: ReferenceId
63    },
64    BytesShortSeq {
65        pooled_id: String,
66        index: ReferenceId,
67    },
68    U32Seq {
69        offset: usize,
70        len: usize
71    },
72    List {
73        item_type: String,
74        value: String,
75        len: usize,
76        searchable: bool
77    },
78    Pair {
79        keys: ReferenceId,
80        values: ReferenceId
81    },
82    Tiny(ReferenceId),
83    Small {
84        seed: u64,
85        data: ReferenceId
86    },
87    Medium {
88        seed: u64,
89        pilots: ReferenceId,
90        remap: ReferenceId,
91        data: ReferenceId
92    }
93}
94
95struct CountWriter<W> {
96    writer: W,
97    count: usize
98}
99
100impl MapOutput {
101    /// The seed can be saved and used in next compute to keep output stable.
102    pub fn seed(&self) -> Option<u64> {
103        match &self.kind {
104            MapKind::Tiny => None,
105            MapKind::Small(seed) => Some(*seed),
106            MapKind::Medium { seed, .. } => Some(*seed)
107        }
108    }
109
110    /// Generates a reordered iterator based on the constructed map.
111    ///
112    /// The lengths of provided lists must be equal.    
113    pub fn reorder<'list: 'map, 'map, T>(&'map self, list: &'list [T])
114        -> impl ExactSizeIterator<Item = &'list T> + DoubleEndedIterator + 'map
115    {
116        assert_eq!(self.index.len(), list.len());
117
118        self.index.iter().map(|&idx| &list[idx])
119    }
120
121    /// Create static map
122    ///
123    /// # NOTE
124    ///
125    /// The provided data must be reordered, otherwise the behavior will be unexpected.
126    pub fn create_map(&self, name: String, data: ReferenceId, builder: &mut CodeBuilder)
127        -> io::Result<ReferenceId>
128    {
129        match &self.kind {
130            MapKind::Tiny => {
131                let id = builder.list.len();
132                builder.list.push(OutputEntry {
133                    name: Some(name),
134                    kind: OutputKind::Tiny(data)
135                });
136                Ok(ReferenceId(id))
137            },
138            MapKind::Small(seed) => {
139                let id = builder.list.len();
140                builder.list.push(OutputEntry {
141                    name: Some(name),
142                    kind: OutputKind::Small { seed: *seed, data }
143                });
144                Ok(ReferenceId(id))                
145            },
146            MapKind::Medium { seed, pilots, remap } => {
147                let pilots = if pilots.len() > 1024 {
148                    let offset = builder.u8seq_writer.count();
149                    builder.u8seq_writer.write_u8seq(pilots)?;
150                    let len = builder.u8seq_writer.count() - offset;
151
152                    let id = builder.list.len();
153                    builder.list.push(OutputEntry {
154                        name: None,
155                        kind: OutputKind::U8Seq { offset, len }
156                    });
157                    ReferenceId(id)
158                } else {
159                    builder.create_list_raw(None, "u8".into(), false, pilots.iter().copied())?
160                };
161
162                let remap = builder.create_u32_seq_raw(None, remap.iter().copied())?;
163
164                let id = builder.list.len();
165                builder.list.push(OutputEntry {
166                    name: Some(name),
167                    kind: OutputKind::Medium {
168                        seed: *seed,
169                        pilots, remap, data
170                    }
171                });
172                Ok(ReferenceId(id))
173            },
174        }
175    }
176}
177
178impl<'a> CodeBuilder<'a> {
179    /// Specifies the name, hash, and directory to use for the output map code.
180    ///
181    /// Note that `hash` must be a fully qualified type path that implements
182    /// the [`HashOne`](crate::phf::HashOne) trait
183    /// and is consistent with the algorithm used by MapBuilder.
184    pub fn new(
185        name: String,
186        hash: String,
187        u8seq_writer: &'a mut U8SeqWriter,
188        u32seq_writer: &'a mut U32SeqWriter,
189    ) -> CodeBuilder<'a> {
190        CodeBuilder {
191            name, hash,
192            vis: None,
193            list: Vec::new(),
194            u8seq_writer,
195            u32seq_writer,
196        }
197    }
198
199    /// This will configure the generated code as `pub(vis)`.
200    pub fn set_visibility(&mut self, vis: Option<String>) {
201        self.vis = vis;
202    }
203
204    pub fn create_custom(&mut self, name: String) -> ReferenceId {
205        let id = self.list.len();
206        self.list.push(OutputEntry {
207            name: None,
208            kind: OutputKind::Custom { name }
209        });
210        ReferenceId(id)
211    }
212
213    fn create_list_raw<SEQ, T>(
214        &mut self,
215        name: Option<String>,
216        item_type: String,
217        searchable: bool,
218        seq: SEQ
219    )
220        -> io::Result<ReferenceId>
221    where
222        SEQ: ExactSizeIterator<Item = T>,
223        T: fmt::Display
224    {
225        use std::io::Write;
226        
227        let len = seq.len();        
228        let mut s = Vec::new();
229        write!(s, "&[")?;
230        for t in seq {
231            write!(s, "{},", t)?;
232        }
233        write!(s, "]")?;
234        let value = String::from_utf8(s).unwrap();
235        
236        let id = self.list.len();
237        self.list.push(OutputEntry {
238            name,
239            kind: OutputKind::List { item_type, len, value, searchable }
240        });
241        Ok(ReferenceId(id))
242    }
243
244    pub fn create_keys<SEQ, T>(&mut self, name: String, item_type: String, mapout: &MapOutput, seq: SEQ)
245        -> io::Result<ReferenceId>
246    where
247        SEQ: Iterator<Item = T> + ExactSizeIterator,
248        T: fmt::Display
249    {
250        self.create_list_raw(Some(name), item_type, matches!(mapout.kind, MapKind::Tiny), seq)
251    }
252
253    pub fn create_list<SEQ, T>(&mut self, name: String, item_type: String, seq: SEQ)
254        -> io::Result<ReferenceId>
255    where
256        SEQ: Iterator<Item = T> + ExactSizeIterator,
257        T: fmt::Display
258    {
259        self.create_list_raw(Some(name), item_type, false, seq)
260    }
261    
262    pub fn create_pair(&mut self, keys: ReferenceId, values: ReferenceId) -> ReferenceId {
263        let id = self.list.len();
264        self.list.push(OutputEntry {
265            name: None,
266            kind: OutputKind::Pair { keys, values }
267        });
268        ReferenceId(id)
269    }
270
271    pub fn create_bytes_keys<SEQ, B>(&mut self, name: String, mapout: &MapOutput, seq: SEQ)
272        -> io::Result<ReferenceId>
273    where
274        SEQ: Iterator<Item = B> + ExactSizeIterator,
275        B: AsRef<[u8]>
276    {
277        if seq.len() > 16 {
278            self.create_bytes_position_seq(name, seq)
279        } else {
280            self.create_list_raw(
281                Some(name),
282                "&'static [u8]".into(),
283                matches!(mapout.kind, MapKind::Tiny),
284                seq.map(|b| format!("&{:?}", b.as_ref()))
285            )
286        }
287    }
288
289    pub fn create_bytes_position_seq<SEQ, B>(&mut self, name: String, seq: SEQ)
290        -> io::Result<ReferenceId>
291    where
292        SEQ: Iterator<Item = B> + ExactSizeIterator,
293        B: AsRef<[u8]>
294    {
295        let offset = self.u8seq_writer.count();
296        let mut count = 0;
297        let mut list = Vec::new();
298        for buf in seq {
299            let buf = buf.as_ref();
300            self.u8seq_writer.write_u8seq(buf)?;
301
302            let len: u32 = buf.len().try_into().unwrap();
303            count += len;
304            list.push(count);
305        }
306        let len = self.u8seq_writer.count() - offset;
307        let index = self.create_u32_seq_raw(None, list.iter().copied())?;
308
309        let id = self.list.len();
310        self.list.push(OutputEntry {
311            name: Some(name),
312            kind: OutputKind::BytesPositionSeq { offset, len, index }
313        });
314        Ok(ReferenceId(id))
315    }
316
317    fn create_u32_seq_raw<SEQ>(&mut self, name: Option<String>, seq: SEQ)
318        -> io::Result<ReferenceId>
319    where
320        SEQ: Iterator<Item = u32> + ExactSizeIterator
321    {
322        if seq.len() > 1024 {
323            let offset = self.u32seq_writer.count();
324            for n in seq {
325                self.u32seq_writer.write_u32(n)?;
326            }
327            let len = self.u32seq_writer.count() - offset;
328
329            let id = self.list.len();
330            self.list.push(OutputEntry {
331                name,
332                kind: OutputKind::U32Seq { offset, len }
333            });
334            Ok(ReferenceId(id))
335        } else {
336            self.create_list_raw(name, "u32".into(), false, seq)
337        }        
338    }    
339
340    pub fn create_u32_seq<SEQ>(&mut self, name: String, seq: SEQ)
341        -> io::Result<ReferenceId>
342    where
343        SEQ: Iterator<Item = u32> + ExactSizeIterator
344    {
345        self.create_u32_seq_raw(Some(name), seq)
346    }
347
348    pub fn create_short_id_seq<SEQ>(&mut self, name: String, pool: &ShortPool<'_>, seq: SEQ)
349        -> io::Result<ReferenceId>
350    where
351        SEQ: Iterator<Item = ShortId> + ExactSizeIterator
352    {
353        let index = self.create_u32_seq_raw(None, seq.map(|id| id.0))?;
354        let id = self.list.len();
355        self.list.push(OutputEntry {
356            name: Some(name),
357            kind: OutputKind::BytesShortSeq {
358                pooled_id: pool.entry.clone(), index
359            }
360        });
361        Ok(ReferenceId(id))
362    }
363
364    pub fn codegen(self, writer: &mut dyn io::Write) -> io::Result<()> {
365        struct ReferenceEntry {
366            name: String,
367        }
368
369        let crate_name = env!("CARGO_CRATE_NAME");
370        let vis = self.vis.as_deref()
371            .map(|vis| format!("pub({}) ", vis))
372            .unwrap_or_default();
373        let u8seq_name = self.u8seq_writer.0.entry.clone();
374        let u32seq_name = self.u32seq_writer.0.entry.clone();        
375
376        let mut list: Vec<ReferenceEntry> = Vec::with_capacity(self.list.len());
377
378        for (idx, entry) in self.list.iter().enumerate() {
379            let entry = match &entry.kind {
380                OutputKind::Custom { name } => ReferenceEntry {
381                    name: name.clone(),
382                },
383                OutputKind::U8Seq { offset, len } => {
384                    let ty = format!(
385                        "{crate_name}::store::SliceData<{}, {}, {}>",
386                        offset,
387                        len,
388                        u8seq_name,
389                    );
390
391                    if let Some(entry_name) = entry.name.as_ref() {
392                        writeln!(writer, "{vis}type {} = {};", entry_name, ty)?;
393                        ReferenceEntry { name: entry_name.clone() }
394                    } else {
395                        ReferenceEntry { name: ty }
396                    }
397                },
398                OutputKind::U32Seq { offset, len } => {
399                    let data_ty = format!(
400                        "{crate_name}::store::SliceData<{}, {}, {}>",
401                        offset,
402                        len,
403                        u32seq_name,
404                    );
405                    let ty = format!(
406                        "{crate_name}::aligned::AlignedArray<{}, u32, {}>",
407                        len,
408                        data_ty,
409                    );
410
411                    if let Some(entry_name) = entry.name.as_ref() {
412                        writeln!(writer, "{vis}type {} = {};", entry_name, ty)?;
413                        ReferenceEntry { name: entry_name.clone() }
414                    } else {
415                        ReferenceEntry { name: ty }
416                    }
417                },
418                OutputKind::BytesPositionSeq { offset, len, index } => {
419                    let data_ty = format!(
420                        "{crate_name}::store::SliceData<{}, {}, {}>",
421                        offset, len, u8seq_name
422                    );
423                    let ty = format!(
424                        "{crate_name}::seq::PositionSeq<{}, {}>",
425                        &list[index.0].name,
426                        data_ty,
427                    );
428
429                    let entry_name = entry.name.as_ref().unwrap();
430                    writeln!(writer, "{vis}type {} = {};", entry_name, ty)?;
431                    ReferenceEntry { name: entry_name.clone() }                    
432                },
433                OutputKind::BytesShortSeq { pooled_id, index } => {
434                    let ty = format!(
435                        "{crate_name}::seq::PooledSeq<{}, {}>",
436                        &list[index.0].name,
437                        pooled_id
438                    );
439
440                    let entry_name = entry.name.as_ref().unwrap();
441                    writeln!(writer, "{vis}type {} = {};", entry_name, ty)?;
442                    ReferenceEntry { name: entry_name.clone() }
443                }
444                OutputKind::List { item_type, value, len, searchable } => {
445                    let namebuf;
446                    let entry_name = if let Some(name) = entry.name.as_ref() {
447                        name
448                    } else {
449                        namebuf = format!("PrecomputedList{}{}", self.name, idx);
450                        &namebuf
451                    };
452                    writeln!(
453                        writer,
454                        "{crate_name}::define!(const {}{}: &[{}; {}] = {});",
455                        searchable.then_some("searchable ").unwrap_or_default(),
456                        entry_name,
457                        item_type,
458                        len,
459                        value
460                    )?;
461                    ReferenceEntry { name: entry_name.clone() }
462                },
463                OutputKind::Pair { keys, values } => {
464                    let ty = format!(
465                        "({}, {})",
466                        &list[keys.0].name,
467                        &list[values.0].name,
468                    );
469                    ReferenceEntry { name: ty }                    
470                }
471                OutputKind::Tiny(data) => {
472                    let ty = format!(
473                        "{crate_name}::TinyMap<{}>",
474                        &list[data.0].name
475                    );
476                    let val = format!("{crate_name}::TinyMap::new()");
477
478                    let entry_name = entry.name.as_ref().unwrap();
479                    writeln!(writer, "{vis}const {}: {} = {};", entry_name, ty, val)?;
480                    ReferenceEntry { name: entry_name.clone() }
481                },
482                OutputKind::Small { seed, data } => {
483                    let ty = format!(
484                        "{crate_name}::SmallMap<{}, {}>",
485                        &list[data.0].name,
486                        self.hash,
487                    );
488                    let val = format!(
489                        "{crate_name}::SmallMap::new({})",
490                        seed,
491                    );
492
493                    let entry_name = entry.name.as_ref().unwrap();
494                    writeln!(writer, "{vis}const {}: {} = {};", entry_name, ty, val)?;
495                    ReferenceEntry { name: entry_name.clone() }
496                },
497                OutputKind::Medium { seed, pilots, remap, data } => {
498                    let ty = format!(
499                        "{crate_name}::MediumMap<{}, {}, {}, {}>",
500                        &list[pilots.0].name,
501                        &list[remap.0].name,
502                        &list[data.0].name,
503                        self.hash,
504                    );
505                    let val = format!(
506                        "{crate_name}::MediumMap::new({})",
507                        seed,
508                    );
509
510                    let entry_name = entry.name.as_ref().unwrap();
511                    writeln!(writer, "{vis}const {}: {} = {};", entry_name, ty, val)?;
512                    ReferenceEntry { name: entry_name.clone() }
513                },
514            };
515
516            list.push(entry);
517        }
518
519        Ok(())
520    }
521}
522
523impl<W: io::Write> io::Write for CountWriter<W> {
524    fn write(&mut self, b: &[u8]) -> io::Result<usize> {
525        let n = self.writer.write(b)?;
526        self.count += n;
527        Ok(n)
528    }
529
530    fn write_vectored(&mut self, bufs: &[io::IoSlice<'_>]) -> io::Result<usize> {
531        let n = self.writer.write_vectored(bufs)?;
532        self.count += n;
533        Ok(n)
534    }
535
536    fn flush(&mut self) -> io::Result<()> {
537        self.writer.flush()
538    }
539}
540
541impl BytesWriter {
542    fn writer(&mut self) -> io::Result<&mut CountWriter<fs::File>> {
543        if self.writer.is_some() {
544            Ok(self.writer.as_mut().unwrap())
545        } else {
546            let fd = fs::File::create_new(&self.file)?;
547            Ok(self.writer.get_or_insert(CountWriter {
548                writer: fd,
549                count: 0
550            }))
551        }
552    }
553}
554
555impl U8SeqWriter {
556    pub fn new(entry: String, file: PathBuf) -> U8SeqWriter {
557        U8SeqWriter(BytesWriter {
558            entry, file,
559            writer: None
560        })
561    }
562    
563    fn write_u8seq(&mut self, seq: &[u8]) -> io::Result<()> {
564        self.0.writer()?.write_all(seq)
565    }
566
567    fn count(&self) -> usize {
568        self.0.writer.as_ref().map(|writer| writer.count).unwrap_or_default()
569    }
570
571    pub fn codegen(self, code_writer: &mut dyn io::Write) -> io::Result<()> {
572        let crate_name = env!("CARGO_CRATE_NAME");
573        
574        if let Some(writer) = self.0.writer.as_ref() {
575            writeln!(
576                code_writer,
577                r#"{crate_name}::define!(const {name}: &[u8; {count}] = include "{file}");"#,
578                name = self.0.entry,
579                count = writer.count,
580                file = self.0.file.file_name().unwrap().display()
581            )?;
582        }
583
584        Ok(())
585    }
586}
587
588impl U32SeqWriter {
589    pub fn new(entry: String, file: PathBuf) -> U32SeqWriter {
590        U32SeqWriter(BytesWriter {
591            entry, file,
592            writer: None
593        })
594    }
595    
596    fn write_u32(&mut self, n: u32) -> io::Result<()> {
597        self.0.writer()?.write_all(&n.to_le_bytes())
598    }
599        
600    fn count(&self) -> usize {
601        self.0.writer.as_ref().map(|writer| writer.count).unwrap_or_default()
602    }
603
604    pub fn codegen(self, code_writer: &mut dyn io::Write) -> io::Result<()> {
605        let crate_name = env!("CARGO_CRATE_NAME");
606        
607        if let Some(writer) = self.0.writer.as_ref() {
608            writeln!(
609                code_writer,
610                r#"{crate_name}::define!(const {name}: &[u8 align u32; {count}] = include "{file}");"#,
611                name = self.0.entry,
612                count = writer.count,
613                file = self.0.file.file_name().unwrap().display(),
614            )?;
615        }
616
617        Ok(())
618    }
619}
620
621impl<'s> ShortPool<'s> {
622    pub fn new(entry: String) -> ShortPool<'s> {
623        ShortPool {
624            entry,
625            buf: Vec::new(),
626            map: HashMap::new()
627        }
628    }
629
630    pub fn insert(&mut self, value: &'s [u8]) -> ShortId {
631        self.insert_cow(value.into())
632    }
633    
634    pub fn insert_cow(&mut self, value: Cow<'s, [u8]>) -> ShortId {
635        *self.map.entry(value.clone()).or_insert_with(|| {
636            let offset = self.buf.len();
637            self.buf.extend_from_slice(&value);
638            let len: u8 = (self.buf.len() - offset).try_into().unwrap();
639            let offset: u32 = offset.try_into().unwrap();
640
641            if offset > (1 << 24) {
642                panic!("bytes pool too large");
643            }
644
645            ShortId(offset | (u32::from(len) << 24))
646        })
647    }
648
649    pub fn get(&self, id: ShortId) -> &[u8] {
650        let (offset, len) = crate::seq::pooled_unpack(id.0);
651        &self.buf[offset..][..len]
652    }
653
654    pub fn codegen(self, builder: &mut CodeBuilder<'_>, writer: &mut dyn io::Write) -> io::Result<()> {
655        if self.map.is_empty() {
656            return Ok(());
657        }
658
659        let crate_name = env!("CARGO_CRATE_NAME");
660        let vis = builder.vis.as_deref()
661            .map(|vis| format!("pub({}) ", vis))
662            .unwrap_or_default();        
663
664        let data_offset = builder.u8seq_writer.count();
665        builder.u8seq_writer.write_u8seq(&self.buf)?;
666        let data_len = builder.u8seq_writer.count() - data_offset;
667
668        writeln!(writer,
669            r#"
670#[derive(Clone, Copy)]
671{vis}struct {name}(u32);
672
673impl From<u32> for {name} {{
674    fn from(n: u32) -> Self {{
675        {name}(n)
676    }}
677}}
678
679impl {crate_name}::seq::PooledId for {name} {{
680    fn get(self) -> Option<&'static [u8]> {{
681        use {crate_name}::store::AsData;
682    
683        let (offset, len) = {crate_name}::seq::pooled_unpack(self.0);
684        <{crate_name}::store::SliceData<{data_offset}, {data_len}, {u8seq}>>::as_data()
685            .get(offset..offset + len)
686    }}
687}}
688            "#,
689            u8seq = builder.u8seq_writer.0.entry,
690            name = self.entry,
691        )
692    }
693}