use crate::token::{CsNameInterner, Token, Value};
use std::collections::BTreeMap;
use std::ops::Bound::Included;
use std::path::PathBuf;
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Key(u32);
impl Key {
pub fn dummy() -> Key {
Key(u32::MAX)
}
}
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct KeyRange {
next: u32,
limit: u32,
}
impl KeyRange {
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Key {
if self.next >= self.limit {
panic!["requested more trace keys than are in the range"]
}
let n = self.next;
self.next += 1;
Key(n)
}
pub fn peek(&mut self) -> Key {
if self.next >= self.limit {
panic!["requested more trace keys than are in the range"]
}
Key(self.next)
}
}
impl KeyRange {
pub fn empty() -> KeyRange {
KeyRange { next: 0, limit: 0 }
}
#[cfg(test)]
pub fn for_testing() -> KeyRange {
KeyRange {
next: 0,
limit: u32::MAX,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct SourceCodeTrace {
pub file_name: PathBuf,
pub line_content: String,
pub line_number: usize,
pub index: usize,
pub value: String,
pub token: Option<Token>,
}
#[derive(Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Tracer {
checkpoints: BTreeMap<u32, Checkpoint>,
next_key: u32,
last_external_input: Option<u32>,
}
impl Tracer {
pub fn register_source_code(
&mut self,
token: Option<Token>,
file_name: PathBuf,
source_code: &str,
) -> KeyRange {
let len = match u32::try_from(source_code.len()) {
Err(_) => {
panic!(
"source code too big ({} bytes); max is 2^32={} bytes",
source_code.len(),
u32::MAX
)
}
Ok(0) => 1_u32,
Ok(limit) => limit,
};
let range = KeyRange {
next: self.next_key,
limit: self.next_key + len,
};
self.checkpoints.insert(
range.next,
Checkpoint::SourceCode {
file_name,
content: source_code.to_string(),
},
);
if token.is_none() {
self.last_external_input = Some(self.next_key);
}
self.next_key = range.limit;
range
}
pub fn trace(&self, token: Token, cs_name_interner: &CsNameInterner) -> SourceCodeTrace {
let value = match token.value() {
Value::ControlSequence(cs_name) => {
format!["\\{}", cs_name_interner.resolve(cs_name).unwrap()]
}
_ => token.char().unwrap().to_string(),
};
let (&first_key, checkpoint) = self
.checkpoints
.range((Included(&0), Included(&token.trace_key.0)))
.rev()
.next()
.unwrap();
match checkpoint {
Checkpoint::SourceCode { file_name, content } => {
let char_offset = (token.trace_key().0 - first_key) as usize;
let mut line_number = 1;
let mut byte_line_start = 0;
let mut char_line_start = 0;
for (char_index, (byte_index, c)) in content.char_indices().enumerate() {
if char_index == char_offset {
break;
}
if c == '\n' {
byte_line_start = byte_index + 1;
char_line_start = char_index + 1;
line_number += 1;
}
}
let position = char_offset - char_line_start;
let tail = &content[byte_line_start..];
let line_content = match tail.split_once('\n') {
None => tail.to_string(),
Some((a, _)) => a.to_string(),
};
SourceCodeTrace {
file_name: file_name.clone(),
line_content,
line_number,
index: position,
value,
token: Some(token),
}
}
}
}
pub fn trace_end_of_input(&self) -> SourceCodeTrace {
let f = self
.checkpoints
.get(&self.last_external_input.unwrap())
.unwrap();
match f {
Checkpoint::SourceCode { file_name, content } => {
let mut last_line: (usize, usize) = (0, 0);
let mut last_non_empty_line: (usize, usize) = (0, 0);
for (i, c) in content.char_indices() {
if !c.is_whitespace() {
last_non_empty_line = last_line;
} else if c == '\n' {
last_line.0 += 1;
last_line.1 = i + 1;
}
}
let last_line = content[last_non_empty_line.1..].trim_end();
SourceCodeTrace {
file_name: file_name.clone(),
line_content: last_line.to_string(),
line_number: last_non_empty_line.0 + 1,
index: last_line.len(),
value: " ".to_string(),
token: None,
}
}
}
}
}
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
enum Checkpoint {
SourceCode { file_name: PathBuf, content: String },
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn one_source_code() {
let file_name: PathBuf = "input.tex".into();
let line_1 = "hël".to_string();
let line_2 = "wor\\cömmand".to_string();
let line_3 = "hël".to_string();
let source_code = format!("{}\n{}\n{}", line_1, line_2, line_3);
let mut tracer: Tracer = Default::default();
let mut interner: CsNameInterner = Default::default();
let command = interner.get_or_intern("command");
let mut range = tracer.register_source_code(None, file_name.clone(), &source_code);
let mut tokens = vec![
Token::new_letter('h', range.next()),
Token::new_letter('e', range.next()),
Token::new_letter('l', range.next()),
Token::new_space('\n', range.next()),
Token::new_letter('w', range.next()),
Token::new_letter('o', range.next()),
Token::new_letter('r', range.next()),
Token::new_control_sequence(command, range.next()),
];
for _ in 0.."command".len() {
range.next();
}
let mut extra_tokens = vec![
Token::new_space('\n', range.next()),
Token::new_letter('h', range.next()),
Token::new_letter('e', range.next()),
Token::new_letter('l', range.next()),
];
tokens.append(&mut extra_tokens);
let got_traces: Vec<SourceCodeTrace> = tokens
.iter()
.map(|token| tracer.trace(*token, &interner))
.collect();
let want_traces = vec![
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_1.clone(),
line_number: 1,
index: 0,
value: "h".to_string(),
token: Some(tokens[0]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_1.clone(),
line_number: 1,
index: 1,
value: "e".to_string(),
token: Some(tokens[1]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_1.clone(),
line_number: 1,
index: 2,
value: "l".to_string(),
token: Some(tokens[2]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_1.clone(),
line_number: 1,
index: 3,
value: "\n".to_string(),
token: Some(tokens[3]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_2.clone(),
line_number: 2,
index: 0,
value: "w".to_string(),
token: Some(tokens[4]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_2.clone(),
line_number: 2,
index: 1,
value: "o".to_string(),
token: Some(tokens[5]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_2.clone(),
line_number: 2,
index: 2,
value: "r".to_string(),
token: Some(tokens[6]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_2.clone(),
line_number: 2,
index: 3,
value: "\\command".to_string(),
token: Some(tokens[7]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_2.clone(),
line_number: 2,
index: 11,
value: "\n".to_string(),
token: Some(tokens[8]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_3.clone(),
line_number: 3,
index: 0,
value: "h".to_string(),
token: Some(tokens[9]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_3.clone(),
line_number: 3,
index: 1,
value: "e".to_string(),
token: Some(tokens[10]),
},
SourceCodeTrace {
file_name: file_name.clone(),
line_content: line_3.clone(),
line_number: 3,
index: 2,
value: "l".to_string(),
token: Some(tokens[11]),
},
];
assert_eq!(want_traces, got_traces);
}
#[test]
fn multiple_source_code() {
let mut tokens = Vec::new();
let mut tracer: Tracer = Default::default();
let interner: CsNameInterner = Default::default();
let file_1: PathBuf = "a.tex".into();
let file_1_content = "a".to_string();
let mut range = tracer.register_source_code(None, file_1.clone(), &file_1_content);
tokens.push(Token::new_letter('a', range.next()));
let file_2: PathBuf = "b.tex".into();
let file_2_content = "b".to_string();
let mut range = tracer.register_source_code(None, file_2.clone(), &file_2_content);
tokens.push(Token::new_letter('b', range.next()));
let file_3: PathBuf = "c.tex".into();
let file_3_content = "c".to_string();
let mut range = tracer.register_source_code(None, file_3.clone(), &file_3_content);
tokens.push(Token::new_letter('c', range.next()));
let got_traces: Vec<SourceCodeTrace> = tokens
.iter()
.map(|token| tracer.trace(*token, &interner))
.collect();
let want_traces = vec![
SourceCodeTrace {
file_name: file_1,
line_content: file_1_content,
line_number: 1,
index: 0,
value: "a".to_string(),
token: Some(tokens[0]),
},
SourceCodeTrace {
file_name: file_2,
line_content: file_2_content,
line_number: 1,
index: 0,
value: "b".to_string(),
token: Some(tokens[1]),
},
SourceCodeTrace {
file_name: file_3,
line_content: file_3_content,
line_number: 1,
index: 0,
value: "c".to_string(),
token: Some(tokens[2]),
},
];
assert_eq!(want_traces, got_traces);
}
}