reflexo_typst2vec/pass/
glyph2vec.rs

1//! Lowering Typst Document into SvgItem.
2
3use std::collections::HashSet;
4use std::ops::DerefMut;
5use std::sync::Arc;
6
7use parking_lot::Mutex;
8use rayon::iter::IntoParallelIterator;
9use rayon::iter::ParallelIterator;
10use ttf_parser::GlyphId;
11use typst::foundations::Bytes;
12use typst::foundations::Smart;
13use typst::layout::Size;
14use typst::text::Font;
15use typst::visualize::Image;
16use typst::visualize::SvgImage;
17
18use crate::font::GlyphProvider;
19use crate::ir::{self, FlatGlyphItem, FontItem, FontPack, FontRef, GlyphItem, GlyphRef};
20use crate::IntoTypst;
21
22pub type Glyph2VecPass = TGlyph2VecPass</* ENABLE_REF_CNT */ false>;
23pub type IncrGlyph2VecPass = TGlyph2VecPass</* ENABLE_REF_CNT */ true>;
24
25pub struct ConvertInnerImpl {
26    /// A glyph backend provider.
27    pub gp: GlyphProvider,
28
29    /// Whether to lower ligature information
30    pub lowering_ligature: bool,
31}
32
33/// Lower a glyph into vector item.
34pub struct TGlyph2VecPass<const ENABLE_REF_CNT: bool = false> {
35    pub inner: ConvertInnerImpl,
36
37    /// Incremental state
38    /// The lifetime of items, used to determine the lifetime of the new items.
39    pub lifetime: u64,
40    /// The new font items produced in this lifecycle.
41    pub new_fonts: Mutex<Vec<FontItem>>,
42    /// The new glyph items produced in this lifecycle.
43    pub new_glyphs: Mutex<Vec<(GlyphRef, GlyphItem)>>,
44
45    /// Intermediate representation of an incompleted font pack.
46    /// All font items are stored in this map, and then sorted by the index.
47    font_mapping: reflexo::adt::CHashMap<Font, FontRef>,
48    /// Detect font short hash conflict
49    font_conflict_checker: reflexo::adt::CHashMap<u32, Font>,
50    /// Lock to get a unique local index for each font.
51    font_index: Mutex<usize>,
52
53    /// Intermediate representation of an incompleted glyph pack.
54    glyph_defs: reflexo::adt::CHashMap<GlyphItem, (GlyphRef, FontRef)>,
55
56    /// for interning
57    pub used_fonts: HashSet<FontRef>,
58    pub used_glyphs: HashSet<GlyphRef>,
59}
60
61impl<const ENABLE_REF_CNT: bool> TGlyph2VecPass<ENABLE_REF_CNT> {
62    pub fn new(gp: GlyphProvider, lowering_ligature: bool) -> Self {
63        Self {
64            inner: ConvertInnerImpl::new(gp, lowering_ligature),
65
66            lifetime: 0,
67            font_mapping: Default::default(),
68            font_conflict_checker: Default::default(),
69            font_index: Default::default(),
70            glyph_defs: Default::default(),
71            new_fonts: Default::default(),
72            new_glyphs: Default::default(),
73            used_fonts: Default::default(),
74            used_glyphs: Default::default(),
75        }
76    }
77
78    pub fn finalize(&self) -> (FontPack, Vec<(GlyphRef, FlatGlyphItem)>) {
79        let mut fonts = self.font_mapping.clone().into_iter().collect::<Vec<_>>();
80        fonts.sort_by(|(_, a), (_, b)| a.idx.cmp(&b.idx));
81        let fonts = fonts.into_iter().map(|(a, _)| a.into_typst()).collect();
82
83        let glyphs = self.glyph_defs.clone().into_iter().collect::<Vec<_>>();
84        let glyphs = glyphs
85            .into_par_iter()
86            .flat_map(|(a, b)| {
87                self.inner.must_flat_glyph(&a).map(|g| {
88                    (
89                        GlyphRef {
90                            font_hash: b.1.hash,
91                            glyph_idx: b.0.glyph_idx,
92                        },
93                        g,
94                    )
95                })
96            })
97            .collect();
98
99        (fonts, glyphs)
100    }
101
102    pub fn build_font(&self, font: &Font) -> FontRef {
103        if let Some(id) = self.font_mapping.get(font) {
104            return *id;
105        }
106
107        // Lock before insertion checking to ensure atomicity
108        let mut font_index_lock = self.font_index.lock();
109
110        let entry = self.font_mapping.entry(font.clone());
111        let entry = entry.or_insert_with(|| {
112            let font_index = font_index_lock.deref_mut();
113            let mut abs_ref = FontRef {
114                hash: reflexo::hash::hash32(font),
115                idx: (*font_index) as u32,
116            };
117            *font_index += 1;
118
119            // Detect font short hash conflict
120            'conflict_detection: loop {
121                if let Some(conflict) = self.font_conflict_checker.get(&abs_ref.hash) {
122                    if *conflict != *font {
123                        log::error!(
124                            "font conflict detected: {} {:?} {:?}",
125                            abs_ref.hash,
126                            font,
127                            conflict
128                        );
129                    }
130                    abs_ref.hash += 1;
131                    continue 'conflict_detection;
132                }
133
134                self.font_conflict_checker
135                    .insert(abs_ref.hash, font.clone());
136                break 'conflict_detection;
137            }
138
139            if ENABLE_REF_CNT {
140                self.new_fonts.lock().push(font.clone().into_typst());
141            }
142
143            abs_ref
144        });
145
146        *entry.value()
147    }
148
149    pub fn build_glyph(&self, font_ref: FontRef, glyph: GlyphItem) -> GlyphRef {
150        let (_, id) = match &glyph {
151            GlyphItem::Raw(g, id) => (g, id),
152            _ => todo!(),
153        };
154
155        let glyph_idx = id.0 as u32;
156
157        let abs_ref = GlyphRef {
158            font_hash: font_ref.hash,
159            glyph_idx,
160        };
161
162        if self
163            .glyph_defs
164            .insert(glyph.clone(), (abs_ref, font_ref))
165            .is_some()
166        {
167            return abs_ref;
168        }
169
170        if ENABLE_REF_CNT {
171            self.new_glyphs.lock().push((abs_ref, glyph));
172        }
173
174        abs_ref
175    }
176
177    #[allow(dead_code)]
178    pub(crate) fn verify_glyph(&self, id: GlyphRef, data: &GlyphItem) {
179        if let Some(glyph) = self.glyph_defs.get(data) {
180            assert_eq!(glyph.0, id);
181        } else {
182            panic!("glyph not found");
183        }
184    }
185}
186
187impl IncrGlyph2VecPass {
188    pub fn finalize_delta(&self) -> (FontPack, Vec<(GlyphRef, FlatGlyphItem)>) {
189        let fonts = std::mem::take(self.new_fonts.lock().deref_mut());
190        let glyphs = std::mem::take(self.new_glyphs.lock().deref_mut());
191        let glyphs = glyphs
192            .into_par_iter()
193            .flat_map(|(id, glyph)| {
194                let glyph = self.inner.must_flat_glyph(&glyph);
195                glyph.map(|glyph| (id, glyph))
196            })
197            .collect::<Vec<_>>();
198        (fonts, glyphs)
199    }
200}
201
202impl ConvertInnerImpl {
203    pub fn new(gp: GlyphProvider, lowering_ligature: bool) -> Self {
204        Self {
205            gp,
206            lowering_ligature: cfg!(feature = "experimental-ligature") && lowering_ligature,
207        }
208    }
209
210    pub fn glyph(&self, glyph_item: &GlyphItem) -> Option<GlyphItem> {
211        match glyph_item {
212            GlyphItem::Raw(font, id) => self.raw_glyph(font, *id),
213            GlyphItem::Image(..) | GlyphItem::Outline(..) => Some(glyph_item.clone()),
214            GlyphItem::None => Some(GlyphItem::None),
215        }
216    }
217
218    pub fn must_flat_glyph(&self, glyph_item: &GlyphItem) -> Option<FlatGlyphItem> {
219        let glyph_item = self.glyph(glyph_item)?;
220        match glyph_item {
221            GlyphItem::Outline(i) => Some(FlatGlyphItem::Outline(i)),
222            GlyphItem::Image(i) => Some(FlatGlyphItem::Image(i)),
223            GlyphItem::None | GlyphItem::Raw(..) => None,
224        }
225    }
226
227    #[cfg(not(feature = "glyph2vec"))]
228    fn raw_glyph(&self, _font: &Font, _id: GlyphId) -> Option<GlyphItem> {
229        None
230    }
231}
232
233#[cfg(feature = "glyph2vec")]
234impl ConvertInnerImpl {
235    fn ligature_len(&self, font: &Font, id: GlyphId) -> u8 {
236        if !self.lowering_ligature {
237            return 0;
238        }
239
240        self.gp
241            .ligature_glyph(font, id)
242            .map(|l| l.len())
243            .unwrap_or_default() as u8
244    }
245
246    fn raw_glyph(&self, font: &Font, id: GlyphId) -> Option<GlyphItem> {
247        self.svg_glyph(font, id)
248            .map(GlyphItem::Image)
249            .or_else(|| self.bitmap_glyph(font, id).map(GlyphItem::Image))
250            .or_else(|| self.outline_glyph(font, id).map(GlyphItem::Outline))
251    }
252
253    /// Lower an SVG glyph into svg item.
254    /// More information: https://learn.microsoft.com/zh-cn/typography/opentype/spec/svg
255    fn svg_glyph(&self, font: &Font, id: GlyphId) -> Option<Arc<ir::ImageGlyphItem>> {
256        use crate::ir::Scalar;
257        use crate::utils::AbsExt;
258
259        let image = Self::extract_svg_glyph(&self.gp, font, id)?;
260
261        // position our image
262        let ascender = font
263            .metrics()
264            .ascender
265            .at(typst::layout::Abs::pt(font.metrics().units_per_em))
266            .to_f32();
267
268        Some(Arc::new(ir::ImageGlyphItem {
269            ts: ir::Transform {
270                sx: Scalar(1.),
271                ky: Scalar(0.),
272                kx: Scalar(0.),
273                sy: Scalar(-1.),
274                tx: Scalar(0.),
275                ty: Scalar(ascender),
276            },
277            image,
278            ligature_len: self.ligature_len(font, id),
279        }))
280    }
281
282    /// Lower a bitmap glyph into the svg text.
283    fn bitmap_glyph(&self, font: &Font, id: GlyphId) -> Option<Arc<ir::ImageGlyphItem>> {
284        use crate::utils::AbsExt;
285        /// Use types from `tiny-skia` crate.
286        use tiny_skia as sk;
287
288        let ppem = u16::MAX;
289        let upem = font.metrics().units_per_em as f32;
290
291        let (glyph_image, raster_x, raster_y) = self.gp.bitmap_glyph(font, id, ppem)?;
292
293        // FIXME: Vertical alignment isn't quite right for Apple Color Emoji,
294        // and maybe also for Noto Color Emoji. And: Is the size calculation
295        // correct?
296
297        let w = glyph_image.width();
298        let h = glyph_image.height();
299        let sz = Size::new(typst::layout::Abs::pt(w), typst::layout::Abs::pt(h));
300
301        let image = ir::ImageItem {
302            image: Arc::new(glyph_image.into_typst()),
303            size: sz.into_typst(),
304        };
305
306        // position our image
307        // first, the ascender is used
308        // next, also apply an offset of (1 - ascender) like typst
309        let adjusted = font.metrics().ascender * 2. - typst::layout::Em::one();
310        // let adjusted = font.metrics().ascender;
311
312        let adjusted = adjusted
313            .at(typst::layout::Abs::pt(font.metrics().units_per_em))
314            .to_f32();
315
316        let ts = sk::Transform::from_scale(upem / w as f32, -upem / h as f32);
317
318        // size
319        let dx = raster_x as f32;
320        let dy = raster_y as f32;
321        let ts = ts.post_translate(dx, adjusted + dy);
322
323        Some(Arc::new(ir::ImageGlyphItem {
324            ts: ts.into(),
325            image,
326            ligature_len: self.ligature_len(font, id),
327        }))
328    }
329
330    /// Lower an outline glyph into svg text. This is the "normal" case.
331    fn outline_glyph(&self, font: &Font, id: GlyphId) -> Option<Arc<ir::OutlineGlyphItem>> {
332        let d = self.gp.outline_glyph(font, id)?.into();
333
334        Some(Arc::new(ir::OutlineGlyphItem {
335            ts: None,
336            d,
337            ligature_len: self.ligature_len(font, id),
338        }))
339    }
340
341    fn extract_svg_glyph(g: &GlyphProvider, font: &Font, id: GlyphId) -> Option<ir::ImageItem> {
342        struct FindViewBoxResult<'a> {
343            start_span: Option<xmlparser::StrSpan<'a>>,
344            first_viewbox: Option<(xmlparser::StrSpan<'a>, xmlparser::StrSpan<'a>)>,
345        }
346
347        /// Find the string location of the **first** viewBox attribute.
348        /// When there are multiple viewBox attributes, the first one is used
349        /// (as many xml-based dom engines do).
350        fn find_viewbox_attr(svg_str: &'_ str) -> FindViewBoxResult<'_> {
351            let document = xmlparser::Tokenizer::from(svg_str);
352
353            let mut start_span = None;
354            let mut first_viewbox = None;
355            for n in document {
356                let tok = n.unwrap();
357                match tok {
358                    xmlparser::Token::ElementStart { span, local, .. } => {
359                        if local.as_str() == "svg" {
360                            start_span = Some(span);
361                        }
362                    }
363                    xmlparser::Token::Attribute {
364                        span, local, value, ..
365                    } => {
366                        if local.as_str() == "viewBox" {
367                            first_viewbox = Some((span, value));
368                            break;
369                        }
370                    }
371                    xmlparser::Token::ElementEnd { .. } => break,
372                    _ => {}
373                }
374            }
375
376            FindViewBoxResult {
377                start_span,
378                first_viewbox,
379            }
380        }
381        use crate::utils::AbsExt;
382        use std::io::Read;
383
384        use std::sync::OnceLock;
385
386        static WARN_VIEW_BOX: OnceLock<()> = OnceLock::new();
387
388        let data = g.svg_glyph(font, id)?;
389        let mut data = data.as_ref();
390
391        let font_metrics = font.metrics();
392
393        // Decompress SVGZ.
394        let mut decoded = vec![];
395
396        // The first three bytes of the gzip-encoded document header must be
397        //   0x1F, 0x8B, 0x08.
398        if data.starts_with(&[0x1f, 0x8b]) {
399            let mut decoder = flate2::read::GzDecoder::new(data);
400            decoder.read_to_end(&mut decoded).ok()?;
401            data = &decoded;
402        }
403
404        // todo: It is also legal to provide a SVG document containing multiple glyphs.
405        // > When a font engine renders glyph 14, the result shall be the same as
406        // > rendering the following SVG document:
407        // > `  <svg> <defs> <use #glyph{id}> </svg>`
408        // See: <https://learn.microsoft.com/en-us/typography/opentype/spec/svg#glyph-identifiers>
409
410        let upem = typst::layout::Abs::pt(font.units_per_em());
411        let (width, height) = (upem.to_f32(), upem.to_f32());
412        let origin_ascender = font_metrics.ascender.at(upem).to_f32();
413
414        let doc_string = String::from_utf8(data.to_owned()).unwrap();
415
416        // todo: verify SVG capability requirements and restrictions
417
418        // Partially parse the view box attribute
419        let mut svg_str = std::str::from_utf8(data).ok()?.to_owned();
420        let FindViewBoxResult {
421            start_span,
422            first_viewbox,
423        } = find_viewbox_attr(svg_str.as_str());
424
425        // determine view box
426        let view_box = first_viewbox
427            .as_ref()
428            .map(|s| {
429                WARN_VIEW_BOX.get_or_init(|| {
430                    eprintln!(
431                        "render_svg_glyph with viewBox, This should be helpful if you can help us verify the result: {:?} {:?}",
432                        font.info().family,
433                        doc_string
434                    );
435                });
436                s.1.as_str().to_owned()
437            })
438            .unwrap_or_else(|| format!("0 {} {width} {height}", -origin_ascender));
439
440        // determine view box
441        match first_viewbox {
442            Some((span, ..)) => {
443                // replace the first viewBox attribute
444                svg_str.replace_range(span.range(), format!(r#"viewBox="{view_box}""#).as_str());
445            }
446            None => {
447                // insert viewBox attribute to the begin of svg tag
448                svg_str.insert_str(
449                    start_span.unwrap().range().end,
450                    format!(r#" viewBox="{view_box}""#).as_str(),
451                );
452            }
453        }
454
455        let glyph_image = typst::visualize::Image::new(
456            SvgImage::new(Bytes::from_string(svg_str)).ok()?,
457            None,
458            // todo: scaling
459            Smart::Auto,
460        );
461
462        let sz = Size::new(
463            typst::layout::Abs::pt(glyph_image.width()),
464            typst::layout::Abs::pt(glyph_image.height()),
465        );
466
467        Some(ir::ImageItem {
468            image: Arc::new(glyph_image.into_typst()),
469            size: sz.into_typst(),
470        })
471    }
472}
473
474/// Lower a raster or SVG image into svg item.
475#[comemo::memoize]
476fn lower_image(image: &Image, size: Size) -> ir::ImageItem {
477    ir::ImageItem {
478        image: Arc::new(image.clone().into_typst()),
479        size: size.into_typst(),
480    }
481}