reflexo_typst2vec/pass/
glyph2vec.rs

1//! Lowering Typst Document into SvgItem.
2
3use std::collections::HashSet;
4use std::ops::DerefMut;
5use std::sync::Arc;
6
7use parking_lot::Mutex;
8use rayon::iter::IntoParallelIterator;
9use rayon::iter::ParallelIterator;
10use ttf_parser::GlyphId;
11use typst::layout::Size;
12use typst::text::Font;
13use typst::visualize::Image;
14
15use crate::font::GlyphProvider;
16use crate::ir::{self, FlatGlyphItem, FontItem, FontPack, FontRef, GlyphItem, GlyphRef};
17use crate::IntoTypst;
18
19pub type Glyph2VecPass = TGlyph2VecPass</* ENABLE_REF_CNT */ false>;
20pub type IncrGlyph2VecPass = TGlyph2VecPass</* ENABLE_REF_CNT */ true>;
21
22pub struct ConvertInnerImpl {
23    /// A glyph backend provider.
24    pub gp: GlyphProvider,
25
26    /// Whether to lower ligature information
27    pub lowering_ligature: bool,
28}
29
30/// Lower a glyph into vector item.
31pub struct TGlyph2VecPass<const ENABLE_REF_CNT: bool = false> {
32    pub inner: ConvertInnerImpl,
33
34    /// Incremental state
35    /// The lifetime of items, used to determine the lifetime of the new items.
36    pub lifetime: u64,
37    /// The new font items produced in this lifecycle.
38    pub new_fonts: Mutex<Vec<FontItem>>,
39    /// The new glyph items produced in this lifecycle.
40    pub new_glyphs: Mutex<Vec<(GlyphRef, GlyphItem)>>,
41
42    /// Intermediate representation of an incompleted font pack.
43    /// All font items are stored in this map, and then sorted by the index.
44    font_mapping: reflexo::adt::CHashMap<Font, FontRef>,
45    /// Detect font short hash conflict
46    font_conflict_checker: reflexo::adt::CHashMap<u32, Font>,
47    /// Lock to get a unique local index for each font.
48    font_index: Mutex<usize>,
49
50    /// Intermediate representation of an incompleted glyph pack.
51    glyph_defs: reflexo::adt::CHashMap<GlyphItem, (GlyphRef, FontRef)>,
52
53    /// for interning
54    pub used_fonts: HashSet<FontRef>,
55    pub used_glyphs: HashSet<GlyphRef>,
56}
57
58impl<const ENABLE_REF_CNT: bool> TGlyph2VecPass<ENABLE_REF_CNT> {
59    pub fn new(gp: GlyphProvider, lowering_ligature: bool) -> Self {
60        Self {
61            inner: ConvertInnerImpl::new(gp, lowering_ligature),
62
63            lifetime: 0,
64            font_mapping: Default::default(),
65            font_conflict_checker: Default::default(),
66            font_index: Default::default(),
67            glyph_defs: Default::default(),
68            new_fonts: Default::default(),
69            new_glyphs: Default::default(),
70            used_fonts: Default::default(),
71            used_glyphs: Default::default(),
72        }
73    }
74
75    pub fn finalize(&self) -> (FontPack, Vec<(GlyphRef, FlatGlyphItem)>) {
76        let mut fonts = self.font_mapping.clone().into_iter().collect::<Vec<_>>();
77        fonts.sort_by(|(_, a), (_, b)| a.idx.cmp(&b.idx));
78        let fonts = fonts.into_iter().map(|(a, _)| a.into_typst()).collect();
79
80        let glyphs = self.glyph_defs.clone().into_iter().collect::<Vec<_>>();
81        let glyphs = glyphs
82            .into_par_iter()
83            .flat_map(|(a, b)| {
84                self.inner.must_flat_glyph(&a).map(|g| {
85                    (
86                        GlyphRef {
87                            font_hash: b.1.hash,
88                            glyph_idx: b.0.glyph_idx,
89                        },
90                        g,
91                    )
92                })
93            })
94            .collect();
95
96        (fonts, glyphs)
97    }
98
99    pub fn build_font(&self, font: &Font) -> FontRef {
100        if let Some(id) = self.font_mapping.get(font) {
101            return *id;
102        }
103
104        // Lock before insertion checking to ensure atomicity
105        let mut font_index_lock = self.font_index.lock();
106
107        let entry = self.font_mapping.entry(font.clone());
108        let entry = entry.or_insert_with(|| {
109            let font_index = font_index_lock.deref_mut();
110            let mut abs_ref = FontRef {
111                hash: reflexo::hash::hash32(font),
112                idx: (*font_index) as u32,
113            };
114            *font_index += 1;
115
116            // Detect font short hash conflict
117            'conflict_detection: loop {
118                if let Some(conflict) = self.font_conflict_checker.get(&abs_ref.hash) {
119                    if *conflict != *font {
120                        log::error!(
121                            "font conflict detected: {} {:?} {:?}",
122                            abs_ref.hash,
123                            font,
124                            conflict
125                        );
126                    }
127                    abs_ref.hash += 1;
128                    continue 'conflict_detection;
129                }
130
131                self.font_conflict_checker
132                    .insert(abs_ref.hash, font.clone());
133                break 'conflict_detection;
134            }
135
136            if ENABLE_REF_CNT {
137                self.new_fonts.lock().push(font.clone().into_typst());
138            }
139
140            abs_ref
141        });
142
143        *entry.value()
144    }
145
146    pub fn build_glyph(&self, font_ref: FontRef, glyph: GlyphItem) -> GlyphRef {
147        let (_, id) = match &glyph {
148            GlyphItem::Raw(g, id) => (g, id),
149            _ => todo!(),
150        };
151
152        let glyph_idx = id.0 as u32;
153
154        let abs_ref = GlyphRef {
155            font_hash: font_ref.hash,
156            glyph_idx,
157        };
158
159        if self
160            .glyph_defs
161            .insert(glyph.clone(), (abs_ref, font_ref))
162            .is_some()
163        {
164            return abs_ref;
165        }
166
167        if ENABLE_REF_CNT {
168            self.new_glyphs.lock().push((abs_ref, glyph));
169        }
170
171        abs_ref
172    }
173
174    #[allow(dead_code)]
175    pub(crate) fn verify_glyph(&self, id: GlyphRef, data: &GlyphItem) {
176        if let Some(glyph) = self.glyph_defs.get(data) {
177            assert_eq!(glyph.0, id);
178        } else {
179            panic!("glyph not found");
180        }
181    }
182}
183
184impl IncrGlyph2VecPass {
185    pub fn finalize_delta(&self) -> (FontPack, Vec<(GlyphRef, FlatGlyphItem)>) {
186        let fonts = std::mem::take(self.new_fonts.lock().deref_mut());
187        let glyphs = std::mem::take(self.new_glyphs.lock().deref_mut());
188        let glyphs = glyphs
189            .into_par_iter()
190            .flat_map(|(id, glyph)| {
191                let glyph = self.inner.must_flat_glyph(&glyph);
192                glyph.map(|glyph| (id, glyph))
193            })
194            .collect::<Vec<_>>();
195        (fonts, glyphs)
196    }
197}
198
199impl ConvertInnerImpl {
200    pub fn new(gp: GlyphProvider, lowering_ligature: bool) -> Self {
201        Self {
202            gp,
203            lowering_ligature: cfg!(feature = "experimental-ligature") && lowering_ligature,
204        }
205    }
206
207    pub fn glyph(&self, glyph_item: &GlyphItem) -> Option<GlyphItem> {
208        match glyph_item {
209            GlyphItem::Raw(font, id) => self.raw_glyph(font, *id),
210            GlyphItem::Image(..) | GlyphItem::Outline(..) => Some(glyph_item.clone()),
211            GlyphItem::None => Some(GlyphItem::None),
212        }
213    }
214
215    pub fn must_flat_glyph(&self, glyph_item: &GlyphItem) -> Option<FlatGlyphItem> {
216        let glyph_item = self.glyph(glyph_item)?;
217        match glyph_item {
218            GlyphItem::Outline(i) => Some(FlatGlyphItem::Outline(i)),
219            GlyphItem::Image(i) => Some(FlatGlyphItem::Image(i)),
220            GlyphItem::None | GlyphItem::Raw(..) => None,
221        }
222    }
223
224    #[cfg(not(feature = "glyph2vec"))]
225    fn raw_glyph(&self, _font: &Font, _id: GlyphId) -> Option<GlyphItem> {
226        None
227    }
228}
229
230#[cfg(feature = "glyph2vec")]
231impl ConvertInnerImpl {
232    fn ligature_len(&self, font: &Font, id: GlyphId) -> u8 {
233        if !self.lowering_ligature {
234            return 0;
235        }
236
237        self.gp
238            .ligature_glyph(font, id)
239            .map(|l| l.len())
240            .unwrap_or_default() as u8
241    }
242
243    fn raw_glyph(&self, font: &Font, id: GlyphId) -> Option<GlyphItem> {
244        self.svg_glyph(font, id)
245            .map(GlyphItem::Image)
246            .or_else(|| self.bitmap_glyph(font, id).map(GlyphItem::Image))
247            .or_else(|| self.outline_glyph(font, id).map(GlyphItem::Outline))
248    }
249
250    /// Lower an SVG glyph into svg item.
251    /// More information: https://learn.microsoft.com/zh-cn/typography/opentype/spec/svg
252    fn svg_glyph(&self, font: &Font, id: GlyphId) -> Option<Arc<ir::ImageGlyphItem>> {
253        use crate::ir::Scalar;
254        use crate::utils::AbsExt;
255
256        let image = Self::extract_svg_glyph(&self.gp, font, id)?;
257
258        // position our image
259        let ascender = font
260            .metrics()
261            .ascender
262            .at(typst::layout::Abs::pt(font.metrics().units_per_em))
263            .to_f32();
264
265        Some(Arc::new(ir::ImageGlyphItem {
266            ts: ir::Transform {
267                sx: Scalar(1.),
268                ky: Scalar(0.),
269                kx: Scalar(0.),
270                sy: Scalar(-1.),
271                tx: Scalar(0.),
272                ty: Scalar(ascender),
273            },
274            image,
275            ligature_len: self.ligature_len(font, id),
276        }))
277    }
278
279    /// Lower a bitmap glyph into the svg text.
280    fn bitmap_glyph(&self, font: &Font, id: GlyphId) -> Option<Arc<ir::ImageGlyphItem>> {
281        use crate::utils::AbsExt;
282        /// Use types from `tiny-skia` crate.
283        use tiny_skia as sk;
284
285        let ppem = u16::MAX;
286        let upem = font.metrics().units_per_em as f32;
287
288        let (glyph_image, raster_x, raster_y) = self.gp.bitmap_glyph(font, id, ppem)?;
289
290        // FIXME: Vertical alignment isn't quite right for Apple Color Emoji,
291        // and maybe also for Noto Color Emoji. And: Is the size calculation
292        // correct?
293
294        let w = glyph_image.width();
295        let h = glyph_image.height();
296        let sz = Size::new(typst::layout::Abs::pt(w), typst::layout::Abs::pt(h));
297
298        let image = ir::ImageItem {
299            image: Arc::new(glyph_image.into_typst()),
300            size: sz.into_typst(),
301        };
302
303        // position our image
304        // first, the ascender is used
305        // next, also apply an offset of (1 - ascender) like typst
306        let adjusted = font.metrics().ascender * 2. - typst::layout::Em::one();
307        // let adjusted = font.metrics().ascender;
308
309        let adjusted = adjusted
310            .at(typst::layout::Abs::pt(font.metrics().units_per_em))
311            .to_f32();
312
313        let ts = sk::Transform::from_scale(upem / w as f32, -upem / h as f32);
314
315        // size
316        let dx = raster_x as f32;
317        let dy = raster_y as f32;
318        let ts = ts.post_translate(dx, adjusted + dy);
319
320        Some(Arc::new(ir::ImageGlyphItem {
321            ts: ts.into(),
322            image,
323            ligature_len: self.ligature_len(font, id),
324        }))
325    }
326
327    /// Lower an outline glyph into svg text. This is the "normal" case.
328    fn outline_glyph(&self, font: &Font, id: GlyphId) -> Option<Arc<ir::OutlineGlyphItem>> {
329        let d = self.gp.outline_glyph(font, id)?.into();
330
331        Some(Arc::new(ir::OutlineGlyphItem {
332            ts: None,
333            d,
334            ligature_len: self.ligature_len(font, id),
335        }))
336    }
337
338    fn extract_svg_glyph(g: &GlyphProvider, font: &Font, id: GlyphId) -> Option<ir::ImageItem> {
339        struct FindViewBoxResult<'a> {
340            start_span: Option<xmlparser::StrSpan<'a>>,
341            first_viewbox: Option<(xmlparser::StrSpan<'a>, xmlparser::StrSpan<'a>)>,
342        }
343
344        /// Find the string location of the **first** viewBox attribute.
345        /// When there are multiple viewBox attributes, the first one is used
346        /// (as many xml-based dom engines do).
347        fn find_viewbox_attr(svg_str: &'_ str) -> FindViewBoxResult<'_> {
348            let document = xmlparser::Tokenizer::from(svg_str);
349
350            let mut start_span = None;
351            let mut first_viewbox = None;
352            for n in document {
353                let tok = n.unwrap();
354                match tok {
355                    xmlparser::Token::ElementStart { span, local, .. } => {
356                        if local.as_str() == "svg" {
357                            start_span = Some(span);
358                        }
359                    }
360                    xmlparser::Token::Attribute {
361                        span, local, value, ..
362                    } => {
363                        if local.as_str() == "viewBox" {
364                            first_viewbox = Some((span, value));
365                            break;
366                        }
367                    }
368                    xmlparser::Token::ElementEnd { .. } => break,
369                    _ => {}
370                }
371            }
372
373            FindViewBoxResult {
374                start_span,
375                first_viewbox,
376            }
377        }
378        use crate::utils::AbsExt;
379        use std::io::Read;
380
381        use std::sync::OnceLock;
382
383        static WARN_VIEW_BOX: OnceLock<()> = OnceLock::new();
384
385        let data = g.svg_glyph(font, id)?;
386        let mut data = data.as_ref();
387
388        let font_metrics = font.metrics();
389
390        // Decompress SVGZ.
391        let mut decoded = vec![];
392
393        // The first three bytes of the gzip-encoded document header must be
394        //   0x1F, 0x8B, 0x08.
395        if data.starts_with(&[0x1f, 0x8b]) {
396            let mut decoder = flate2::read::GzDecoder::new(data);
397            decoder.read_to_end(&mut decoded).ok()?;
398            data = &decoded;
399        }
400
401        // todo: It is also legal to provide a SVG document containing multiple glyphs.
402        // > When a font engine renders glyph 14, the result shall be the same as
403        // > rendering the following SVG document:
404        // > `  <svg> <defs> <use #glyph{id}> </svg>`
405        // See: <https://learn.microsoft.com/en-us/typography/opentype/spec/svg#glyph-identifiers>
406
407        let upem = typst::layout::Abs::pt(font.units_per_em());
408        let (width, height) = (upem.to_f32(), upem.to_f32());
409        let origin_ascender = font_metrics.ascender.at(upem).to_f32();
410
411        let doc_string = String::from_utf8(data.to_owned()).unwrap();
412
413        // todo: verify SVG capability requirements and restrictions
414
415        // Partially parse the view box attribute
416        let mut svg_str = std::str::from_utf8(data).ok()?.to_owned();
417        let FindViewBoxResult {
418            start_span,
419            first_viewbox,
420        } = find_viewbox_attr(svg_str.as_str());
421
422        // determine view box
423        let view_box = first_viewbox
424            .as_ref()
425            .map(|s| {
426                WARN_VIEW_BOX.get_or_init(|| {
427                    println!(
428                        "render_svg_glyph with viewBox, This should be helpful if you can help us verify the result: {:?} {:?}",
429                        font.info().family,
430                        doc_string
431                    );
432                });
433                s.1.as_str().to_owned()
434            })
435            .unwrap_or_else(|| format!("0 {} {width} {height}", -origin_ascender));
436
437        // determine view box
438        match first_viewbox {
439            Some((span, ..)) => {
440                // replace the first viewBox attribute
441                svg_str.replace_range(span.range(), format!(r#"viewBox="{view_box}""#).as_str());
442            }
443            None => {
444                // insert viewBox attribute to the begin of svg tag
445                svg_str.insert_str(
446                    start_span.unwrap().range().end,
447                    format!(r#" viewBox="{view_box}""#).as_str(),
448                );
449            }
450        }
451
452        let glyph_image = typst::visualize::Image::new(
453            svg_str.as_bytes().to_vec().into(),
454            typst::visualize::ImageFormat::Vector(typst::visualize::VectorFormat::Svg),
455            // typst::geom::Axes::new(width as u32, height as u32),
456            None,
457        )
458        .ok()?;
459
460        let sz = Size::new(
461            typst::layout::Abs::pt(glyph_image.width()),
462            typst::layout::Abs::pt(glyph_image.height()),
463        );
464
465        Some(ir::ImageItem {
466            image: Arc::new(glyph_image.into_typst()),
467            size: sz.into_typst(),
468        })
469    }
470}
471
472/// Lower a raster or SVG image into svg item.
473#[comemo::memoize]
474fn lower_image(image: &Image, size: Size) -> ir::ImageItem {
475    ir::ImageItem {
476        image: Arc::new(image.clone().into_typst()),
477        size: size.into_typst(),
478    }
479}