1use crate::token::{CsNameInterner, Token, Value};
37use std::collections::BTreeMap;
38use std::ops::Bound::Included;
39use std::path::PathBuf;
40
41#[derive(Debug, PartialEq, Eq, Clone, Copy)]
45#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46pub struct Key(u32);
47
48impl Key {
49 pub fn dummy() -> Key {
50 Key(u32::MAX)
51 }
52}
53
54#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
56pub struct KeyRange {
57 next: u32,
58 limit: u32,
59}
60
61impl KeyRange {
62 #[allow(clippy::should_implement_trait)]
66 pub fn next(&mut self) -> Key {
67 if self.next >= self.limit {
68 panic!["requested more trace keys than are in the range"]
69 }
70 let n = self.next;
71 self.next += 1;
72 Key(n)
73 }
74
75 pub fn peek(&mut self) -> Key {
79 if self.next >= self.limit {
80 panic!["requested more trace keys than are in the range"]
81 }
82 Key(self.next)
83 }
84}
85
86impl KeyRange {
87 pub fn empty() -> KeyRange {
88 KeyRange { next: 0, limit: 0 }
89 }
90
91 #[cfg(test)]
92 pub fn for_testing() -> KeyRange {
93 KeyRange {
94 next: 0,
95 limit: u32::MAX,
96 }
97 }
98}
99
100#[derive(Debug, PartialEq, Eq)]
102pub struct SourceCodeTrace {
103 pub file_name: PathBuf,
105 pub line_content: String,
107 pub line_number: usize,
109 pub index: usize,
111 pub value: String,
113 pub token: Option<Token>,
116}
117
118#[derive(Default)]
120#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
121pub struct Tracer {
122 checkpoints: BTreeMap<u32, Checkpoint>,
123 next_key: u32,
124
125 last_external_input: Option<u32>,
128}
129
130impl Tracer {
131 pub fn register_source_code(
139 &mut self,
140 token: Option<Token>,
141 file_name: PathBuf,
142 source_code: &str,
143 ) -> KeyRange {
144 let len = match u32::try_from(source_code.len()) {
145 Err(_) => {
146 panic!(
147 "source code too big ({} bytes); max is 2^32={} bytes",
148 source_code.len(),
149 u32::MAX
150 )
151 }
152 Ok(0) => 1_u32,
154 Ok(limit) => limit,
155 };
156 let range = KeyRange {
157 next: self.next_key,
158 limit: self.next_key + len,
159 };
160 self.checkpoints.insert(
161 range.next,
162 Checkpoint::SourceCode {
163 file_name,
164 content: source_code.to_string(),
165 },
166 );
167 if token.is_none() {
168 self.last_external_input = Some(self.next_key);
169 }
170 self.next_key = range.limit;
171 range
172 }
173
174 pub fn trace(&self, token: Token, cs_name_interner: &CsNameInterner) -> SourceCodeTrace {
176 let value = match token.value() {
177 Value::ControlSequence(cs_name) => {
178 format!["\\{}", cs_name_interner.resolve(cs_name).unwrap()]
179 }
180 _ => token.char().unwrap().to_string(),
181 };
182
183 let (&first_key, checkpoint) = self
184 .checkpoints
185 .range((Included(&0), Included(&token.trace_key.0)))
186 .rev()
187 .next()
188 .unwrap();
189
190 match checkpoint {
191 Checkpoint::SourceCode { file_name, content } => {
192 let char_offset = (token.trace_key().0 - first_key) as usize;
193 let mut line_number = 1;
194 let mut byte_line_start = 0;
195 let mut char_line_start = 0;
196 for (char_index, (byte_index, c)) in content.char_indices().enumerate() {
197 if char_index == char_offset {
198 break;
199 }
200 if c == '\n' {
201 byte_line_start = byte_index + 1;
202 char_line_start = char_index + 1;
203 line_number += 1;
204 }
205 }
206 let position = char_offset - char_line_start;
207 let tail = &content[byte_line_start..];
208 let line_content = match tail.split_once('\n') {
209 None => tail.to_string(),
210 Some((a, _)) => a.to_string(),
211 };
212 SourceCodeTrace {
213 file_name: file_name.clone(),
214 line_content,
215 line_number,
216 index: position,
217 value,
218 token: Some(token),
219 }
220 }
221 }
222 }
223
224 pub fn trace_end_of_input(&self) -> SourceCodeTrace {
225 let f = self
226 .checkpoints
227 .get(&self.last_external_input.unwrap())
228 .unwrap();
229 match f {
230 Checkpoint::SourceCode { file_name, content } => {
231 let mut last_line: (usize, usize) = (0, 0);
233 let mut last_non_empty_line: (usize, usize) = (0, 0);
234 for (i, c) in content.char_indices() {
235 if !c.is_whitespace() {
236 last_non_empty_line = last_line;
237 } else if c == '\n' {
238 last_line.0 += 1;
239 last_line.1 = i + 1;
240 }
241 }
242 let last_line = content[last_non_empty_line.1..].trim_end();
243 SourceCodeTrace {
244 file_name: file_name.clone(),
245 line_content: last_line.to_string(),
246 line_number: last_non_empty_line.0 + 1,
247 index: last_line.len(),
248 value: " ".to_string(),
249 token: None,
250 }
251 }
252 }
253 }
254}
255
256#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
257enum Checkpoint {
258 SourceCode { file_name: PathBuf, content: String },
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264
265 #[test]
266 fn one_source_code() {
267 let file_name: PathBuf = "input.tex".into();
268 let line_1 = "hël".to_string();
269 let line_2 = "wor\\cömmand".to_string();
270 let line_3 = "hël".to_string();
271 let source_code = format!("{}\n{}\n{}", line_1, line_2, line_3);
272
273 let mut tracer: Tracer = Default::default();
274 let mut interner: CsNameInterner = Default::default();
275 let command = interner.get_or_intern("command");
276 let mut range = tracer.register_source_code(None, file_name.clone(), &source_code);
277 let mut tokens = vec![
278 Token::new_letter('h', range.next()),
279 Token::new_letter('e', range.next()),
280 Token::new_letter('l', range.next()),
281 Token::new_space('\n', range.next()),
282 Token::new_letter('w', range.next()),
283 Token::new_letter('o', range.next()),
284 Token::new_letter('r', range.next()),
285 Token::new_control_sequence(command, range.next()),
286 ];
287 for _ in 0.."command".len() {
288 range.next();
289 }
290 let mut extra_tokens = vec![
291 Token::new_space('\n', range.next()),
292 Token::new_letter('h', range.next()),
293 Token::new_letter('e', range.next()),
294 Token::new_letter('l', range.next()),
295 ];
296 tokens.append(&mut extra_tokens);
297
298 let got_traces: Vec<SourceCodeTrace> = tokens
299 .iter()
300 .map(|token| tracer.trace(*token, &interner))
301 .collect();
302
303 let want_traces = vec![
304 SourceCodeTrace {
305 file_name: file_name.clone(),
306 line_content: line_1.clone(),
307 line_number: 1,
308 index: 0,
309 value: "h".to_string(),
310 token: Some(tokens[0]),
311 },
312 SourceCodeTrace {
313 file_name: file_name.clone(),
314 line_content: line_1.clone(),
315 line_number: 1,
316 index: 1,
317 value: "e".to_string(),
318 token: Some(tokens[1]),
319 },
320 SourceCodeTrace {
321 file_name: file_name.clone(),
322 line_content: line_1.clone(),
323 line_number: 1,
324 index: 2,
325 value: "l".to_string(),
326 token: Some(tokens[2]),
327 },
328 SourceCodeTrace {
329 file_name: file_name.clone(),
330 line_content: line_1.clone(),
331 line_number: 1,
332 index: 3,
333 value: "\n".to_string(),
334 token: Some(tokens[3]),
335 },
336 SourceCodeTrace {
337 file_name: file_name.clone(),
338 line_content: line_2.clone(),
339 line_number: 2,
340 index: 0,
341 value: "w".to_string(),
342 token: Some(tokens[4]),
343 },
344 SourceCodeTrace {
345 file_name: file_name.clone(),
346 line_content: line_2.clone(),
347 line_number: 2,
348 index: 1,
349 value: "o".to_string(),
350 token: Some(tokens[5]),
351 },
352 SourceCodeTrace {
353 file_name: file_name.clone(),
354 line_content: line_2.clone(),
355 line_number: 2,
356 index: 2,
357 value: "r".to_string(),
358 token: Some(tokens[6]),
359 },
360 SourceCodeTrace {
361 file_name: file_name.clone(),
362 line_content: line_2.clone(),
363 line_number: 2,
364 index: 3,
365 value: "\\command".to_string(),
366 token: Some(tokens[7]),
367 },
368 SourceCodeTrace {
369 file_name: file_name.clone(),
370 line_content: line_2.clone(),
371 line_number: 2,
372 index: 11,
373 value: "\n".to_string(),
374 token: Some(tokens[8]),
375 },
376 SourceCodeTrace {
377 file_name: file_name.clone(),
378 line_content: line_3.clone(),
379 line_number: 3,
380 index: 0,
381 value: "h".to_string(),
382 token: Some(tokens[9]),
383 },
384 SourceCodeTrace {
385 file_name: file_name.clone(),
386 line_content: line_3.clone(),
387 line_number: 3,
388 index: 1,
389 value: "e".to_string(),
390 token: Some(tokens[10]),
391 },
392 SourceCodeTrace {
393 file_name: file_name.clone(),
394 line_content: line_3.clone(),
395 line_number: 3,
396 index: 2,
397 value: "l".to_string(),
398 token: Some(tokens[11]),
399 },
400 ];
401 assert_eq!(want_traces, got_traces);
402 }
403
404 #[test]
405 fn multiple_source_code() {
406 let mut tokens = Vec::new();
407 let mut tracer: Tracer = Default::default();
408 let interner: CsNameInterner = Default::default();
409
410 let file_1: PathBuf = "a.tex".into();
411 let file_1_content = "a".to_string();
412 let mut range = tracer.register_source_code(None, file_1.clone(), &file_1_content);
413 tokens.push(Token::new_letter('a', range.next()));
414
415 let file_2: PathBuf = "b.tex".into();
416 let file_2_content = "b".to_string();
417 let mut range = tracer.register_source_code(None, file_2.clone(), &file_2_content);
418 tokens.push(Token::new_letter('b', range.next()));
419
420 let file_3: PathBuf = "c.tex".into();
421 let file_3_content = "c".to_string();
422 let mut range = tracer.register_source_code(None, file_3.clone(), &file_3_content);
423 tokens.push(Token::new_letter('c', range.next()));
424
425 let got_traces: Vec<SourceCodeTrace> = tokens
426 .iter()
427 .map(|token| tracer.trace(*token, &interner))
428 .collect();
429
430 let want_traces = vec![
431 SourceCodeTrace {
432 file_name: file_1,
433 line_content: file_1_content,
434 line_number: 1,
435 index: 0,
436 value: "a".to_string(),
437 token: Some(tokens[0]),
438 },
439 SourceCodeTrace {
440 file_name: file_2,
441 line_content: file_2_content,
442 line_number: 1,
443 index: 0,
444 value: "b".to_string(),
445 token: Some(tokens[1]),
446 },
447 SourceCodeTrace {
448 file_name: file_3,
449 line_content: file_3_content,
450 line_number: 1,
451 index: 0,
452 value: "c".to_string(),
453 token: Some(tokens[2]),
454 },
455 ];
456 assert_eq!(want_traces, got_traces);
457 }
458}