#![forbid(unsafe_code)]
#![warn(missing_docs)]
pub mod backend;
pub mod batch;
pub mod cache;
pub mod script_detect;
pub mod variational;
#[cfg(feature = "system-fonts")]
pub mod system_fonts;
#[cfg(feature = "system-fonts")]
pub use system_fonts::{
build_system_db, load_best_font_for_text, load_best_font_for_text_from, load_font_for_family,
load_font_for_family_from,
};
#[cfg(feature = "native-fallback")]
pub mod native_fallback {
pub use oxifont_adapter_native::shaper_bridge::{
collect_fallback_fonts_for_text, collect_fonts_for_text, find_native_font_for_codepoint,
load_best_native_font_for_text, load_native_font_for_codepoint_with_index,
};
}
#[cfg(feature = "rustybuzz-backend")]
pub use backend::RustybuzzShaper;
pub use backend::ShapeBackend;
pub use backend::SwashShaperBackend;
pub use cache::{FontId, ShapeCache, ShapeKey};
use oxitext_core::{OxiTextError, ShapedGlyph, ShapedRun};
pub use script_detect::{
requires_arabic_shaping, requires_indic_shaping, requires_mark_positioning,
};
use smallvec::SmallVec;
use std::sync::Arc;
use swash::shape::{Direction, ShapeContext};
use swash::FontRef;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ShapeFeature {
pub tag: [u8; 4],
pub value: u32,
}
impl ShapeFeature {
pub const fn new(tag: [u8; 4], value: u32) -> Self {
Self { tag, value }
}
pub const fn enable(tag: [u8; 4]) -> Self {
Self { tag, value: 1 }
}
pub const fn disable(tag: [u8; 4]) -> Self {
Self { tag, value: 0 }
}
pub const LIGA: Self = Self::enable(*b"liga");
pub const KERN: Self = Self::enable(*b"kern");
pub const SMCP: Self = Self::enable(*b"smcp");
pub const CALT: Self = Self::enable(*b"calt");
pub const VERT: Self = Self::enable(*b"vert");
pub const VRT2: Self = Self::enable(*b"vrt2");
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ShapeDirection {
#[default]
Ltr,
Rtl,
Ttb,
Btt,
}
#[derive(Debug, Clone)]
pub struct ShapeRequest<'a> {
pub text: &'a str,
pub font_data: &'a [u8],
pub px_size: f32,
pub direction: ShapeDirection,
pub script: Option<[u8; 4]>,
pub language: Option<[u8; 4]>,
pub features: Vec<ShapeFeature>,
}
impl<'a> ShapeRequest<'a> {
pub fn builder() -> ShapeRequestBuilder<'a> {
ShapeRequestBuilder::default()
}
}
#[derive(Debug, Default)]
pub struct ShapeRequestBuilder<'a> {
text: Option<&'a str>,
font_data: Option<&'a [u8]>,
px_size: f32,
direction: ShapeDirection,
script: Option<[u8; 4]>,
language: Option<[u8; 4]>,
features: Vec<ShapeFeature>,
}
#[derive(Debug)]
pub enum ShapeRequestError {
MissingText,
MissingFont,
}
impl std::fmt::Display for ShapeRequestError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ShapeRequestError::MissingText => f.write_str("text not set"),
ShapeRequestError::MissingFont => f.write_str("font_data not set"),
}
}
}
impl std::error::Error for ShapeRequestError {}
impl<'a> ShapeRequestBuilder<'a> {
pub fn text(mut self, t: &'a str) -> Self {
self.text = Some(t);
self
}
pub fn font_data(mut self, d: &'a [u8]) -> Self {
self.font_data = Some(d);
self
}
pub fn px_size(mut self, s: f32) -> Self {
self.px_size = s;
self
}
pub fn direction(mut self, d: ShapeDirection) -> Self {
self.direction = d;
self
}
pub fn script(mut self, tag: [u8; 4]) -> Self {
self.script = Some(tag);
self
}
pub fn language(mut self, tag: [u8; 4]) -> Self {
self.language = Some(tag);
self
}
pub fn feature(mut self, f: ShapeFeature) -> Self {
self.features.push(f);
self
}
pub fn build(self) -> Result<ShapeRequest<'a>, ShapeRequestError> {
Ok(ShapeRequest {
text: self.text.ok_or(ShapeRequestError::MissingText)?,
font_data: self.font_data.ok_or(ShapeRequestError::MissingFont)?,
px_size: self.px_size,
direction: self.direction,
script: self.script,
language: self.language,
features: self.features,
})
}
}
struct ShapeParams<'a> {
font_data: &'a [u8],
text: &'a str,
px_size: f32,
rtl: bool,
script_tag: Option<[u8; 4]>,
language_tag: Option<[u8; 4]>,
features: &'a [ShapeFeature],
}
pub struct SwashShaper {
ctx: ShapeContext,
cache: Option<Arc<ShapeCache>>,
#[cfg(feature = "icu")]
script_cache_text: String,
#[cfg(feature = "icu")]
script_cache_runs: Vec<oxitext_icu::ScriptRun>,
}
impl SwashShaper {
pub fn new() -> Self {
Self {
ctx: ShapeContext::new(),
cache: None,
#[cfg(feature = "icu")]
script_cache_text: String::new(),
#[cfg(feature = "icu")]
script_cache_runs: Vec::new(),
}
}
pub fn with_cache(capacity: usize) -> Self {
Self {
ctx: ShapeContext::new(),
cache: Some(Arc::new(ShapeCache::new(capacity))),
#[cfg(feature = "icu")]
script_cache_text: String::new(),
#[cfg(feature = "icu")]
script_cache_runs: Vec::new(),
}
}
pub fn shape_cache(&self) -> Option<&Arc<ShapeCache>> {
self.cache.as_ref()
}
pub fn shape(
&mut self,
text: &str,
font_data: Arc<[u8]>,
size: f32,
) -> Result<ShapedRun, OxiTextError> {
let axis_hash = size.to_bits() as u64;
if let Some(ref cache) = self.cache {
let key = ShapeKey::new(&font_data, text, axis_hash);
if let Some(cached) = cache.get(&key) {
return Ok((*cached).clone());
}
}
let font = FontRef::from_index(&font_data, 0)
.ok_or_else(|| OxiTextError::Shaping("swash could not parse font bytes".into()))?;
let mut shaper = self.ctx.builder(font).size(size).build();
shaper.add_str(text);
let mut glyphs: SmallVec<[ShapedGlyph; 8]> = SmallVec::new();
shaper.shape_with(|cluster| {
let cluster_range = cluster.source.start as usize..cluster.source.end as usize;
let is_ws = text
.get(cluster_range)
.map(|slice| !slice.is_empty() && slice.chars().all(|c| c.is_whitespace()))
.unwrap_or(false);
let multi = cluster.glyphs.len() > 1;
for (idx, glyph) in cluster.glyphs.iter().enumerate() {
let utb = (multi && idx > 0) || glyph.info.is_mark();
glyphs.push(ShapedGlyph {
gid: glyph.id,
x_advance: glyph.advance,
y_advance: 0.0,
x_offset: glyph.x,
y_offset: glyph.y,
cluster: cluster.source.start,
is_whitespace: is_ws,
unsafe_to_break: utb,
});
}
});
let run = ShapedRun {
glyphs,
font_data: Arc::clone(&font_data),
};
if let Some(ref cache) = self.cache {
let key = ShapeKey::new(&font_data, text, axis_hash);
cache.insert(key, Arc::new(run.clone()));
}
Ok(run)
}
pub fn shape_with_direction(
&mut self,
text: &str,
font_data: Arc<[u8]>,
size: f32,
rtl: bool,
) -> Result<ShapedRun, OxiTextError> {
if !rtl {
return self.shape(text, font_data, size);
}
let mut run = self.do_shape_rtl(text, font_data, size)?;
run.glyphs.sort_by_key(|g| g.cluster);
Ok(run)
}
pub fn shape_request(
&mut self,
req: &ShapeRequest<'_>,
) -> Result<Vec<ShapedGlyph>, OxiTextError> {
#[cfg(feature = "icu")]
let normalized_text: String;
#[cfg(feature = "icu")]
let req_text: &str = {
normalized_text = oxitext_icu::Normalizer::new().nfc(req.text);
normalized_text.as_str()
};
#[cfg(not(feature = "icu"))]
let req_text: &str = req.text;
let effective_direction = if req.direction == ShapeDirection::Ltr
&& requires_arabic_shaping(req_text)
{
#[cfg(debug_assertions)]
eprintln!("[oxitext-shape] Arabic text detected with Ltr direction; upgrading to Rtl");
ShapeDirection::Rtl
} else {
req.direction
};
let mut features = req.features.clone();
if effective_direction == ShapeDirection::Ttb || effective_direction == ShapeDirection::Btt
{
if !features.iter().any(|f| f.tag == *b"vert") {
features.push(ShapeFeature::VERT);
}
if !features.iter().any(|f| f.tag == *b"vrt2") {
features.push(ShapeFeature::VRT2);
}
}
let rtl = effective_direction == ShapeDirection::Rtl;
self.shape_with_features_internal(ShapeParams {
font_data: req.font_data,
text: req_text,
px_size: req.px_size,
rtl,
script_tag: req.script,
language_tag: req.language,
features: &features,
})
}
pub fn shape_with_features(
&mut self,
font_data: &[u8],
text: &str,
px_size: f32,
rtl: bool,
features: &[ShapeFeature],
) -> Result<Vec<ShapedGlyph>, OxiTextError> {
self.shape_with_features_internal(ShapeParams {
font_data,
text,
px_size,
rtl,
script_tag: None,
language_tag: None,
features,
})
}
fn shape_with_features_internal(
&mut self,
params: ShapeParams<'_>,
) -> Result<Vec<ShapedGlyph>, OxiTextError> {
use swash::tag_from_bytes;
use swash::text::{Language, Script};
let font = FontRef::from_index(params.font_data, 0)
.ok_or_else(|| OxiTextError::Shaping("swash could not parse font bytes".into()))?;
let direction = if params.rtl {
Direction::RightToLeft
} else {
Direction::LeftToRight
};
let script = params
.script_tag
.and_then(|t| Script::from_opentype(tag_from_bytes(&t)))
.unwrap_or(Script::Latin);
let language = params.language_tag.and_then(|t| {
let s = std::str::from_utf8(&t).unwrap_or("").trim_end();
Language::parse(s)
});
let swash_features: Vec<([u8; 4], u16)> = params
.features
.iter()
.map(|f| (f.tag, f.value.min(u32::from(u16::MAX)) as u16))
.collect();
let mut shaper = self
.ctx
.builder(font)
.size(params.px_size)
.direction(direction)
.script(script)
.language(language)
.features(swash_features.iter())
.build();
shaper.add_str(params.text);
let mut glyphs: Vec<ShapedGlyph> = Vec::new();
shaper.shape_with(|cluster| {
let cluster_range = cluster.source.start as usize..cluster.source.end as usize;
let is_ws = params
.text
.get(cluster_range)
.map(|slice| !slice.is_empty() && slice.chars().all(|c| c.is_whitespace()))
.unwrap_or(false);
let multi = cluster.glyphs.len() > 1;
for (idx, glyph) in cluster.glyphs.iter().enumerate() {
let utb = (multi && idx > 0) || glyph.info.is_mark();
glyphs.push(ShapedGlyph {
gid: glyph.id,
x_advance: glyph.advance,
y_advance: 0.0,
x_offset: glyph.x,
y_offset: glyph.y,
cluster: cluster.source.start,
is_whitespace: is_ws,
unsafe_to_break: utb,
});
}
});
if params.rtl {
glyphs.sort_by_key(|g| g.cluster);
}
Ok(glyphs)
}
fn do_shape_rtl(
&mut self,
text: &str,
font_data: Arc<[u8]>,
size: f32,
) -> Result<ShapedRun, OxiTextError> {
let font = FontRef::from_index(&font_data, 0)
.ok_or_else(|| OxiTextError::Shaping("swash could not parse font bytes".into()))?;
let mut shaper = self
.ctx
.builder(font)
.size(size)
.direction(Direction::RightToLeft)
.build();
shaper.add_str(text);
let mut glyphs: SmallVec<[ShapedGlyph; 8]> = SmallVec::new();
shaper.shape_with(|cluster| {
let cluster_range = cluster.source.start as usize..cluster.source.end as usize;
let is_ws = text
.get(cluster_range)
.map(|slice| !slice.is_empty() && slice.chars().all(|c| c.is_whitespace()))
.unwrap_or(false);
let multi = cluster.glyphs.len() > 1;
for (idx, glyph) in cluster.glyphs.iter().enumerate() {
let utb = (multi && idx > 0) || glyph.info.is_mark();
glyphs.push(ShapedGlyph {
gid: glyph.id,
x_advance: glyph.advance,
y_advance: 0.0,
x_offset: glyph.x,
y_offset: glyph.y,
cluster: cluster.source.start,
is_whitespace: is_ws,
unsafe_to_break: utb,
});
}
});
Ok(ShapedRun {
glyphs,
font_data: Arc::clone(&font_data),
})
}
pub fn shape_full(
&mut self,
font_data: &[u8],
text: &str,
px_size: f32,
) -> Result<ShapeResult, OxiTextError> {
use unicode_segmentation::UnicodeSegmentation;
let glyphs = self.shape_with_features_internal(ShapeParams {
font_data,
text,
px_size,
rtl: false,
script_tag: None,
language_tag: None,
features: &[],
})?;
let mut result = ShapeResult::from_glyphs(glyphs, text, ShapeDirection::Ltr);
result.cluster_boundaries = text
.grapheme_indices(true)
.map(|(i, _)| i)
.chain(std::iter::once(text.len()))
.collect();
Ok(result)
}
pub fn shape_slice(
&mut self,
font_data: &[u8],
text: &str,
px_size: f32,
) -> Result<Vec<ShapedGlyph>, OxiTextError> {
self.shape_with_features_internal(ShapeParams {
font_data,
text,
px_size,
rtl: false,
script_tag: None,
language_tag: None,
features: &[],
})
}
pub fn shape_slice_rtl(
&mut self,
font_data: &[u8],
text: &str,
px_size: f32,
) -> Result<Vec<ShapedGlyph>, OxiTextError> {
self.shape_with_features_internal(ShapeParams {
font_data,
text,
px_size,
rtl: true,
script_tag: None,
language_tag: None,
features: &[],
})
}
pub fn shape_with_fallback(
&mut self,
fonts: &[&[u8]],
text: &str,
px_size: f32,
) -> Result<Vec<ShapedGlyph>, OxiTextError> {
let primary = fonts
.first()
.ok_or_else(|| OxiTextError::Shaping("font list is empty".into()))?;
let mut result = self.shape_with_features_internal(ShapeParams {
font_data: primary,
text,
px_size,
rtl: false,
script_tag: None,
language_tag: None,
features: &[],
})?;
if fonts.len() <= 1 {
return Ok(result);
}
let notdef_runs = collect_notdef_runs(&result, text);
for (run_text_start, run_text_end) in notdef_runs {
let sub_text = match text.get(run_text_start..run_text_end) {
Some(s) if !s.is_empty() => s,
_ => continue,
};
'fallback: for fallback_font in &fonts[1..] {
let fallback_glyphs = match self.shape_with_features_internal(ShapeParams {
font_data: fallback_font,
text: sub_text,
px_size,
rtl: false,
script_tag: None,
language_tag: None,
features: &[],
}) {
Ok(g) => g,
Err(_) => continue,
};
if fallback_glyphs.iter().all(|g| g.gid == 0) {
continue;
}
let start_offset = run_text_start as u32;
let adjusted: Vec<ShapedGlyph> = fallback_glyphs
.into_iter()
.map(|mut g| {
g.cluster += start_offset;
g
})
.collect();
result.retain(|g| {
let c = g.cluster as usize;
!(c >= run_text_start && c < run_text_end && g.gid == 0)
});
let insert_pos = result.partition_point(|g| (g.cluster as usize) < run_text_start);
for (i, g) in adjusted.into_iter().enumerate() {
result.insert(insert_pos + i, g);
}
break 'fallback;
}
}
Ok(result)
}
pub fn font_has_aat(font_data: &[u8]) -> bool {
ttf_parser::Face::parse(font_data, 0)
.map(|face| {
face.raw_face()
.table(ttf_parser::Tag::from_bytes(b"morx"))
.is_some()
|| face
.raw_face()
.table(ttf_parser::Tag::from_bytes(b"kerx"))
.is_some()
|| face
.raw_face()
.table(ttf_parser::Tag::from_bytes(b"ankr"))
.is_some()
})
.unwrap_or(false)
}
pub fn shape_with_aat_fallback(
&mut self,
font_data: &[u8],
text: &str,
px_size: f32,
) -> Result<ShapeResult, OxiTextError> {
use unicode_segmentation::UnicodeSegmentation;
let glyphs = self.shape_with_features_internal(ShapeParams {
font_data,
text,
px_size,
rtl: false,
script_tag: None,
language_tag: None,
features: &[],
})?;
let mut result = ShapeResult::from_glyphs(glyphs, text, ShapeDirection::Ltr);
result.cluster_boundaries = text
.grapheme_indices(true)
.map(|(i, _)| i)
.chain(std::iter::once(text.len()))
.collect();
Ok(result)
}
}
#[derive(Debug, Clone)]
pub struct ShapeResult {
pub glyphs: Vec<ShapedGlyph>,
pub script_detected: Option<[u8; 4]>,
pub direction: ShapeDirection,
pub missing_codepoints: Vec<char>,
pub cluster_boundaries: Vec<usize>,
}
impl ShapeResult {
pub fn from_glyphs(glyphs: Vec<ShapedGlyph>, text: &str, direction: ShapeDirection) -> Self {
let missing: Vec<char> = {
let mut seen = std::collections::HashSet::new();
let mut missing = Vec::new();
for g in &glyphs {
if g.gid == 0 {
if let Some(ch) = text
.get(g.cluster as usize..)
.and_then(|s| s.chars().next())
{
if seen.insert(ch) {
missing.push(ch);
}
}
}
}
missing
};
Self {
glyphs,
script_detected: None,
direction,
missing_codepoints: missing,
cluster_boundaries: Vec::new(),
}
}
}
fn collect_notdef_runs(glyphs: &[ShapedGlyph], text: &str) -> Vec<(usize, usize)> {
if glyphs.is_empty() {
return Vec::new();
}
let mut notdef_clusters: Vec<usize> = glyphs
.iter()
.filter(|g| g.gid == 0)
.map(|g| g.cluster as usize)
.collect();
notdef_clusters.sort_unstable();
notdef_clusters.dedup();
let mut all_starts: Vec<usize> = glyphs.iter().map(|g| g.cluster as usize).collect();
all_starts.sort_unstable();
all_starts.dedup();
let mut runs: Vec<(usize, usize)> = Vec::new();
for &start in ¬def_clusters {
let end = all_starts
.iter()
.find(|&&s| s > start)
.copied()
.unwrap_or(text.len());
if let Some(last) = runs.last_mut() {
if last.1 == start {
last.1 = end;
continue;
}
}
runs.push((start, end));
}
runs
}
impl Default for SwashShaper {
fn default() -> Self {
Self::new()
}
}
#[cfg(feature = "system-fonts")]
impl SwashShaper {
pub fn shape_with_system_font(
&mut self,
text: &str,
px_size: f32,
) -> Result<Vec<ShapedGlyph>, OxiTextError> {
let font_data = system_fonts::load_best_font_for_text(text)
.ok_or_else(|| OxiTextError::Shaping("no system font found for text".into()))?;
self.shape_slice(&font_data, text, px_size)
}
pub fn shape_with_family(
&mut self,
text: &str,
family: &str,
px_size: f32,
) -> Result<Vec<ShapedGlyph>, OxiTextError> {
let font_data = system_fonts::load_font_for_family(family).ok_or_else(|| {
OxiTextError::Shaping(format!("no system font found for family '{family}'"))
})?;
self.shape_slice(&font_data, text, px_size)
}
}
#[cfg(feature = "icu")]
fn text_script_to_ot_tag(s: oxitext_icu::TextScript) -> [u8; 4] {
use oxitext_icu::TextScript;
match s {
TextScript::Latin => *b"latn",
TextScript::Arabic => *b"arab",
TextScript::Devanagari => *b"dev2",
TextScript::Han => *b"hani",
TextScript::Hangul => *b"hang",
TextScript::Hiragana | TextScript::Katakana => *b"kana",
TextScript::Hebrew => *b"hebr",
TextScript::Thai => *b"thai",
TextScript::Greek => *b"grek",
TextScript::Cyrillic => *b"cyrl",
_ => *b"DFLT",
}
}
#[cfg(feature = "icu")]
impl SwashShaper {
pub fn shape_by_script(
&mut self,
font_data: Arc<[u8]>,
text: &str,
px_size: f32,
features: &[ShapeFeature],
) -> Result<Vec<ShapedRun>, OxiTextError> {
if self.script_cache_text != text {
let props = oxitext_icu::CharProperties::new();
self.script_cache_runs = props.itemize(text);
self.script_cache_text = text.to_owned();
}
let script_runs = self.script_cache_runs.clone();
let mut result: Vec<ShapedRun> = Vec::with_capacity(script_runs.len());
for run in &script_runs {
let sub_text = text
.get(run.start..run.end)
.ok_or_else(|| OxiTextError::Shaping("invalid script run byte range".into()))?;
let ot_tag = text_script_to_ot_tag(run.script);
let is_rtl = run.script.is_rtl();
let mut glyphs = self.shape_with_features_internal(ShapeParams {
font_data: &font_data,
text: sub_text,
px_size,
rtl: is_rtl,
script_tag: Some(ot_tag),
language_tag: None,
features,
})?;
let start_offset = run.start as u32;
for g in &mut glyphs {
g.cluster += start_offset;
}
result.push(ShapedRun {
glyphs: glyphs.into(),
font_data: Arc::clone(&font_data),
});
}
Ok(result)
}
}
fn is_arabic_dual_joining(c: char) -> bool {
let cp = c as u32;
match cp {
0x0626..=0x063A => !matches!(cp, 0x0627 | 0x062F..=0x0632),
0x0641..=0x064A => !matches!(cp, 0x0648),
_ => false,
}
}
pub fn find_kashida_opportunities(text: &str, glyphs: &[ShapedGlyph]) -> Vec<usize> {
let mut result = Vec::new();
for (idx, glyph) in glyphs.iter().enumerate() {
let byte_pos = glyph.cluster as usize;
if let Some(ch) = text.get(byte_pos..).and_then(|s| s.chars().next()) {
if is_arabic_dual_joining(ch) {
result.push(idx);
}
}
}
result
}
pub fn detect_emoji_zwj_sequences(text: &str) -> Vec<std::ops::Range<usize>> {
use unicode_segmentation::UnicodeSegmentation;
let mut result = Vec::new();
for (start, cluster) in text.grapheme_indices(true) {
if !cluster.contains('\u{200D}') {
continue;
}
let non_zwj_count = cluster.chars().filter(|&c| c != '\u{200D}').count();
if non_zwj_count >= 2 {
let end = start + cluster.len();
result.push(start..end);
}
}
result
}
#[cfg(test)]
mod bench_tests;
#[cfg(test)]
mod tests_inline;