pub(crate) mod embedded;
pub(crate) mod huffman;
pub(crate) mod reader;
use alloc::boxed::Box;
use alloc::vec::Vec;
pub use embedded::load_embedded;
use crate::cmap::bcmap::embedded::BUNDLE;
use crate::cmap::{
BfRange, CMap, CMapName, CharacterCollection, CidFamily, CidRange, CodespaceRange, Metadata,
PartitionedRanges, Range, WritingMode, parse,
};
use huffman::HuffmanTable;
use reader::Reader;
const BCMAP_MAGIC: &[u8] = b"bcmap";
const BCMAP_VERSION: u8 = 0x01;
const BCMAP_FILE_HEADER_SIZE: usize = 10;
const SEG_HEADER_SIZE: usize = 5;
const SEGMENT_RANGE_1B: u8 = 0x01;
const SEGMENT_SINGLE_1B: u8 = 0x02;
const SEGMENT_RANGE_2B: u8 = 0x03;
const SEGMENT_SINGLE_2B: u8 = 0x04;
const SEGMENT_RANGE_3B: u8 = 0x05;
const SEGMENT_SINGLE_3B: u8 = 0x06;
const SEGMENT_RANGE_4B: u8 = 0x07;
const SEGMENT_SINGLE_4B: u8 = 0x08;
const SEGMENT_USECMAP: u8 = 0x09;
const SEGMENT_NOTDEF: u8 = 0x0A;
const SEGMENT_WMODE: u8 = 0x0B;
const SEGMENT_CODESPACE: u8 = 0x0C;
const SEGMENT_NAME: u8 = 0x0D;
const SEGMENT_CID_SYSTEM_INFO: u8 = 0x0E;
const SEGMENT_BF_RANGE_VARIABLE: u8 = 0x0F;
const SEGMENT_BF_SINGLE_VARIABLE: u8 = 0x10;
const SEGMENT_BF_SINGLE_1U: u8 = 0x11;
const SEGMENT_BF_SINGLE_2U: u8 = 0x12;
const SEGMENT_BF_SINGLE_3U: u8 = 0x13;
const SEGMENT_BF_SINGLE_4U: u8 = 0x14;
const SEGMENT_BF_RANGE_1U: u8 = 0x15;
const SEGMENT_BF_RANGE_2U: u8 = 0x16;
pub(crate) fn parse<'a>(
data: &[u8],
get_cmap: impl Fn(CMapName<'_>) -> Option<&'a [u8]> + Clone + 'a,
depth: u32,
) -> Option<CMap> {
if data.get(..5)? != BCMAP_MAGIC || *data.get(5)? != BCMAP_VERSION {
return None;
}
let mut reader = Reader::new(data);
reader.read_bytes(6)?;
let file_len = reader.read_u32()? as usize;
let delta_table = &BUNDLE.delta_table;
let count_table = &BUNDLE.count_table;
let mut cmap_name = None;
let mut character_collection = None;
let mut writing_mode = None;
let mut base: Option<Box<CMap>> = None;
let mut _codespace_ranges = Vec::new();
let mut cid_ranges = PartitionedRanges::new();
let mut notdef_ranges = PartitionedRanges::new();
let mut bf_entries = Vec::new();
let mut reader = Reader::new(data.get(BCMAP_FILE_HEADER_SIZE..file_len)?);
while !reader.at_end() {
let seg_type = reader.read_u8()?;
let seg_len = reader.read_u32()? as usize;
let payload = reader.read_bytes(seg_len.checked_sub(SEG_HEADER_SIZE)?)?;
match seg_type {
SEGMENT_NAME => {
cmap_name = Some(Vec::from(payload));
}
SEGMENT_CID_SYSTEM_INFO => {
let mut r = Reader::new(payload);
let registry = Vec::from(r.eat_until(|b| b == 0));
r.read_u8()?;
let ordering = Vec::from(r.eat_until(|b| b == 0));
r.read_u8()?;
let supplement = r.read_u16()? as i32;
character_collection = Some(CharacterCollection {
family: CidFamily::from_registry_ordering(®istry, &ordering),
supplement,
});
}
SEGMENT_USECMAP => {
let base_data = get_cmap(CMapName::from_bytes(payload))?;
base = Some(Box::new(parse::parse_inner(
base_data,
get_cmap.clone(),
depth,
)?));
}
SEGMENT_WMODE => {
writing_mode = match payload.first()? {
0 => Some(WritingMode::Horizontal),
1 => Some(WritingMode::Vertical),
_ => None,
};
}
SEGMENT_CODESPACE => {
parse_codespace(payload, &mut _codespace_ranges)?;
}
SEGMENT_NOTDEF => {
parse_notdef(payload, &mut notdef_ranges)?;
}
SEGMENT_RANGE_1B | SEGMENT_RANGE_2B | SEGMENT_RANGE_3B | SEGMENT_RANGE_4B => {
let bw = seg_type.div_ceil(2) as usize;
parse_cid_segment(payload, &mut cid_ranges, bw, delta_table, Some(count_table))?;
}
SEGMENT_SINGLE_1B | SEGMENT_SINGLE_2B | SEGMENT_SINGLE_3B | SEGMENT_SINGLE_4B => {
let bw = seg_type.div_ceil(2) as usize;
parse_cid_segment(payload, &mut cid_ranges, bw, delta_table, None)?;
}
SEGMENT_BF_RANGE_VARIABLE => {
parse_bf_segment(
payload,
&mut bf_entries,
delta_table,
Some(count_table),
None,
)?;
}
SEGMENT_BF_SINGLE_VARIABLE => {
parse_bf_segment(payload, &mut bf_entries, delta_table, None, None)?;
}
SEGMENT_BF_RANGE_1U => {
parse_bf_segment(
payload,
&mut bf_entries,
delta_table,
Some(count_table),
Some(1),
)?;
}
SEGMENT_BF_RANGE_2U => {
parse_bf_segment(
payload,
&mut bf_entries,
delta_table,
Some(count_table),
Some(2),
)?;
}
SEGMENT_BF_SINGLE_1U => {
parse_bf_segment(payload, &mut bf_entries, delta_table, None, Some(1))?;
}
SEGMENT_BF_SINGLE_2U => {
parse_bf_segment(payload, &mut bf_entries, delta_table, None, Some(2))?;
}
SEGMENT_BF_SINGLE_3U => {
parse_bf_segment(payload, &mut bf_entries, delta_table, None, Some(3))?;
}
SEGMENT_BF_SINGLE_4U => {
parse_bf_segment(payload, &mut bf_entries, delta_table, None, Some(4))?;
}
_ => {
return None;
}
}
}
cid_ranges.sort();
notdef_ranges.sort();
bf_entries.sort_by(|a, b| a.range.start.cmp(&b.range.start));
Some(CMap {
metadata: Metadata {
character_collection,
name: cmap_name,
writing_mode,
},
_codespace_ranges,
cid_ranges,
notdef_ranges,
bf_entries,
base,
})
}
fn parse_codespace(payload: &[u8], ranges: &mut Vec<CodespaceRange>) -> Option<()> {
let mut r = Reader::new(payload);
let n_ranges = r.read_u8()? as usize;
for _ in 0..n_ranges {
let bw = r.read_u8()? as usize;
let low = r.read_n_bytes(bw)?;
let high = r.read_n_bytes(bw)?;
ranges.push(CodespaceRange {
number_bytes: bw as u8,
low,
high,
});
}
Some(())
}
fn parse_notdef(payload: &[u8], ranges: &mut PartitionedRanges) -> Option<()> {
let mut r = Reader::new(payload);
let bw = r.read_u8()? as usize;
let n_entries = r.read_u16()? as usize;
for _ in 0..n_entries {
let start = r.read_n_bytes(bw)?;
let end = r.read_n_bytes(bw)?;
let cid = r.read_u16()? as u32;
ranges.push(
bw,
CidRange {
range: Range { start, end },
cid_start: cid,
},
);
}
Some(())
}
fn parse_cid_segment(
payload: &[u8],
ranges: &mut PartitionedRanges,
byte_width: usize,
delta_table: &HuffmanTable,
count_table: Option<&HuffmanTable>,
) -> Option<()> {
let mut r = Reader::new(payload);
let n_entries = r.read_u16()? as usize;
if n_entries == 0 {
return Some(());
}
let delta_len = r.read_u32()? as usize;
let delta_data = r.read_bytes(delta_len)?;
let mut delta_reader = Reader::new(delta_data);
let mut deltas = Vec::with_capacity(n_entries);
for _ in 0..n_entries {
deltas.push(delta_table.decode(&mut delta_reader)?);
}
let mut counts = Vec::new();
if let Some(ct) = count_table {
let count_len = r.read_u32()? as usize;
let count_data = r.read_bytes(count_len)?;
let mut count_reader = Reader::new(count_data);
counts.reserve(n_entries);
for _ in 0..n_entries {
counts.push(ct.decode(&mut count_reader)?);
}
}
let is_range = count_table.is_some();
let mut prev_end: Option<u32> = None;
let mut prev_cid: Option<u32> = None;
let mut prev_range_len: u32 = 0;
for i in 0..n_entries {
let raw_cid = r.read_u16()?;
let start = if let Some(pe) = prev_end {
pe + 1 + deltas[i]
} else {
deltas[i]
};
let end = if is_range {
start + counts[i] + 1
} else {
start
};
let cid = if raw_cid == 0 {
if let Some(pc) = prev_cid {
pc + prev_range_len + 1
} else {
0
}
} else {
raw_cid as u32
};
ranges.push(
byte_width,
CidRange {
range: Range { start, end },
cid_start: cid,
},
);
prev_end = Some(end);
prev_cid = Some(cid);
prev_range_len = end - start;
}
Some(())
}
fn parse_bf_segment(
payload: &[u8],
entries: &mut Vec<BfRange>,
delta_table: &HuffmanTable,
count_table: Option<&HuffmanTable>,
fixed_units: Option<usize>,
) -> Option<()> {
let mut r = Reader::new(payload);
let n_entries = r.read_u16()? as usize;
let delta_len = r.read_u32()? as usize;
let delta_data = r.read_bytes(delta_len)?;
let mut delta_reader = Reader::new(delta_data);
let mut deltas = Vec::with_capacity(n_entries);
for _ in 0..n_entries {
deltas.push(delta_table.decode(&mut delta_reader)?);
}
let mut counts = Vec::new();
if let Some(ct) = count_table {
let count_len = r.read_u32()? as usize;
let count_data = r.read_bytes(count_len)?;
let mut count_reader = Reader::new(count_data);
counts.reserve(n_entries);
for _ in 0..n_entries {
counts.push(ct.decode(&mut count_reader)?);
}
}
let is_range = count_table.is_some();
let mut prev_end: Option<u32> = None;
for i in 0..n_entries {
let n_units = match fixed_units {
Some(nu) => nu,
None => r.read_u8()? as usize,
};
let mut dst_base = Vec::with_capacity(n_units);
for _ in 0..n_units {
dst_base.push(r.read_u16()?);
}
let start = if let Some(pe) = prev_end {
pe + 1 + deltas[i]
} else {
deltas[i]
};
let end = if is_range {
start + counts[i] + 1
} else {
start
};
entries.push(BfRange {
range: Range { start, end },
dst_base,
});
prev_end = Some(end);
}
Some(())
}