tectonic_xetex_format/
cshash.rs1use nom::{
7 multi::count,
8 number::complete::{be_i32, be_u8},
9 IResult,
10};
11use tectonic_errors::prelude::*;
12
13use crate::{
14 base::{self, SIZEOF_MEMORY_WORD},
15 engine::Engine,
16 eqtb::EqtbPointer,
17 parseutils,
18 stringtable::{StrPointer, StringTable},
19 symbols::{SymbolCategory, SymbolTable},
20};
21
22pub fn initialize_cshash_symbols(symbols: &mut SymbolTable) -> Result<()> {
23 symbols.add(SymbolCategory::CsHash, "HASH_SIZE", 15_000)?;
24 symbols.add(SymbolCategory::CsHash, "HASH_EXTRA", 600_000)?;
25 symbols.add(SymbolCategory::CsHash, "HASH_OFFSET", 514)?;
26 symbols.add(SymbolCategory::CsHash, "HASH_PRIME", 8501)?;
27 Ok(())
28}
29
30#[derive(Debug)]
31pub struct ControlSeqHash {
32 need_offset_hash: Vec<u8>,
33
34 hash_base: EqtbPointer,
37 hash_prime: u32,
38 hash_offset: i32,
39 single_base: EqtbPointer,
40 null_cs_loc: EqtbPointer,
41 undefined_cs_loc: EqtbPointer,
42 eqtb_size: EqtbPointer,
43 eqtb_top: EqtbPointer,
44 prim_eqtb_base: EqtbPointer,
45 frozen_null_font_loc: EqtbPointer,
46}
47
48impl ControlSeqHash {
49 pub(crate) fn parse<'a>(
50 input: &'a [u8],
51 engine: &Engine,
52 hash_high: i32,
53 ) -> IResult<&'a [u8], Self> {
54 let hash_base = engine.symbols.lookup("HASH_BASE") as EqtbPointer;
55 let hash_prime = engine.symbols.lookup("HASH_PRIME") as u32;
56 let hash_offset = engine.symbols.lookup("HASH_OFFSET") as i32;
57 let single_base = engine.symbols.lookup("SINGLE_BASE") as EqtbPointer;
58 let null_cs_loc = engine.symbols.lookup("NULL_CS") as EqtbPointer;
59 let undefined_cs_loc = engine.symbols.lookup("UNDEFINED_CONTROL_SEQUENCE") as EqtbPointer;
60 let eqtb_size = engine.symbols.lookup("EQTB_SIZE") as EqtbPointer;
61 let eqtb_top = engine.symbols.lookup("EQTB_TOP") as EqtbPointer;
62 let prim_eqtb_base = engine.symbols.lookup("PRIM_EQTB_BASE") as EqtbPointer;
63 let frozen_null_font_loc = engine.symbols.lookup("FROZEN_NULL_FONT") as EqtbPointer;
64
65 let index = |i: i32| (i - hash_offset) as usize * SIZEOF_MEMORY_WORD;
66
67 let high_hash_size = eqtb_top + 1 - hash_offset;
68 let mut need_offset_hash = vec![0u8; high_hash_size as usize * SIZEOF_MEMORY_WORD];
69
70 let (input, hash_used) = parseutils::ranged_be_i32(
71 hash_base,
72 engine.symbols.lookup("FROZEN_CONTROL_SEQUENCE") as i32,
73 )(input)?;
74
75 let mut p = hash_base - 1;
76 let mut input = input;
77
78 loop {
79 let (ii, new_p) = parseutils::ranged_be_i32(p + 1, hash_used)(input)?;
80 p = new_p;
81
82 let (ii, block) = count(be_u8, 8)(ii)?;
84 let ofs = index(p);
85 need_offset_hash[ofs..ofs + 8].copy_from_slice(&block[..]);
86
87 input = ii;
88
89 if p == hash_used {
90 break;
91 }
92 }
93
94 let nb = ((engine.symbols.lookup("UNDEFINED_CONTROL_SEQUENCE") as i32 - 1) - hash_used)
96 as usize
97 * SIZEOF_MEMORY_WORD;
98 let (input, block) = count(be_u8, nb)(input)?;
99 let ofs = index(hash_used + 1);
100 need_offset_hash[ofs..ofs + nb].copy_from_slice(&block[..]);
101
102 let mut input = input;
103
104 if hash_high > 0 {
105 let nb = hash_high as usize * SIZEOF_MEMORY_WORD;
106 let (new_input, block) = count(be_u8, nb)(input)?;
107 input = new_input;
108 let ofs = index(eqtb_size + 1);
109 need_offset_hash[ofs..ofs + nb].copy_from_slice(&block[..]);
110 }
111
112 let (input, _cs_count) = be_i32(input)?;
113
114 Ok((
115 input,
116 ControlSeqHash {
117 need_offset_hash,
118 hash_base,
119 hash_prime,
120 hash_offset,
121 single_base,
122 null_cs_loc,
123 undefined_cs_loc,
124 eqtb_size,
125 eqtb_top,
126 prim_eqtb_base,
127 frozen_null_font_loc,
128 },
129 ))
130 }
131
132 fn decode(&self, index: i32) -> (StrPointer, i32) {
133 let index = index - self.hash_offset;
134 let text_ptr = base::memword_read_b32_s1(&self.need_offset_hash[..], index);
135 let next_ptr = base::memword_read_b32_s0(&self.need_offset_hash[..], index);
136 (text_ptr, next_ptr)
137 }
138
139 pub fn lookup(&self, csname: &str, strings: &StringTable) -> Option<EqtbPointer> {
140 let csname_len_utf16 = crate::stringtable::len_utf16(csname);
141
142 let mut h = 0;
143
144 for c in csname.chars() {
145 h = h + h + c as u32;
146 while h >= self.hash_prime {
147 h -= self.hash_prime;
148 }
149 }
150
151 let mut p = h as i32 + self.hash_base;
152
153 loop {
154 let (str_ptr, next_ptr) = self.decode(p);
155
156 if str_ptr > 0 {
157 let len = strings.utf16_length(str_ptr);
158
159 if len == csname_len_utf16 {
160 let s = strings.lookup(str_ptr);
161
162 if s == csname {
163 return Some(p);
164 }
165 }
166 }
167
168 if next_ptr == 0 {
169 return None;
170 }
171
172 p = next_ptr;
173 }
174 }
175
176 pub fn stringify(&self, p: EqtbPointer, strings: &StringTable) -> Option<String> {
178 if p < self.hash_base {
179 if p >= self.single_base {
183 if p == self.null_cs_loc {
184 return Some("".to_owned());
185 } else {
186 let usv = (p - self.single_base) as u32;
187 return char::from_u32(usv).map(|c| c.to_string());
188 }
189 } else {
190 return Some(format!(
192 "[active character {}]",
193 crate::format::fmt_usv(p - 1)
194 ));
195 }
196 }
197
198 if p >= self.undefined_cs_loc && p <= self.eqtb_size {
199 return None;
200 }
201
202 if p > self.eqtb_top {
203 return None;
204 }
205
206 if p >= self.prim_eqtb_base && p < self.frozen_null_font_loc {
207 return None;
209 }
210
211 let (text_ptr, _next_ptr) = self.decode(p);
214 Some(strings.lookup(text_ptr).to_owned())
215 }
216}