mod icao9303_td3_dg1;
mod jwt;
#[cfg(test)]
use std::{fs, path::Path};
use rustc_hash::FxHashMap;
use super::{automaton::Automaton, regex::Regex, serialization::Serialize};
#[cfg(test)]
const AUTOMATON_CACHE: &str = "src/parsing/scanner/static_specs/automaton_cache";
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum StdLibParser {
Jwt,
Icao9309Td3Dg1,
}
#[cfg(test)]
impl StdLibParser {
pub(super) fn serialization_file(&self) -> String {
format!("{}/{:?}", AUTOMATON_CACHE, self)
}
}
type LibraryData = &'static [(StdLibParser, &'static dyn Fn() -> Regex, &'static [u8])];
pub type ParsingLibrary = FxHashMap<StdLibParser, (Regex, Automaton)>;
fn spec_library_data() -> LibraryData {
&[
(
StdLibParser::Jwt,
&jwt::spec_jwt,
include_bytes!("automaton_cache/Jwt"),
),
(
StdLibParser::Icao9309Td3Dg1,
&icao9303_td3_dg1::spec_icao9303_td3_dg1,
include_bytes!("automaton_cache/Icao9309Td3Dg1"),
),
]
}
pub fn spec_library() -> ParsingLibrary {
spec_library_data()
.iter()
.map(|(name, regex, serialization)| {
assert!(
!serialization.is_empty(),
"Empty serialisation data for {:?}. The bootstrapping of the serialisation process has not been conducted. (see documentation of `midnight_circuits::parsing::scanner::static_specs`)",
*name
);
(*name, (regex(), Automaton::deserialize_unwrap(serialization)))
})
.collect::<FxHashMap<_, _>>()
}
#[cfg(test)]
fn check_serialization(checks: &ParsingLibrary) {
let mut recompile = false;
for (parser, (_, automaton)) in checks {
let file_name = parser.serialization_file();
assert!(
Path::new(&file_name).exists(),
"serialisation file {file_name} does not exist! Follow the documentation of `midnight_circuits::parsing::scanner::static_specs` for instructions on how to add a new parser to the standard library."
);
let previous_data = fs::read(file_name.clone()).unwrap();
let mut current_data = Vec::new();
automaton.serialize(&mut current_data);
if previous_data.is_empty() {
println!("-> bootstrapping the serialisation of {:?}. Recompilation will be necessary so that the executable contains the correct serialised data.", parser);
recompile = true;
fs::write(file_name, ¤t_data).unwrap();
} else {
assert!(
current_data == previous_data,
"The serialisation data of the parsing library (parser name: {:?}) is not up to date. If this is intentional, clear the content of {}, and run the test again to replace its content.",
parser, parser.serialization_file()
);
println!("-> serialisation data of {:?} is up to date.", parser)
}
println!(">> Serialisation checks completed.\n======");
assert!(
!recompile,
"The executable has to be re-compiled so that the serialisation data is up-to-date."
);
}
}
#[cfg(test)]
mod tests {
use std::time::Instant;
use rustc_hash::{FxBuildHasher, FxHashMap};
use super::{
super::automaton::Automaton, check_serialization, spec_library, spec_library_data,
StdLibParser,
};
use crate::parsing::{regex::Regex, scanner::MarkerTestVector};
fn configure_serialisation() {
let lib_data = spec_library_data();
println!("======\nRecomputing the parsing library automata...");
let mut lib = FxHashMap::with_capacity_and_hasher(lib_data.len(), FxBuildHasher);
let start = Instant::now();
for (name, spec, _) in lib_data {
let start_local = Instant::now();
let automaton = spec().to_automaton();
println!(
"-> Generated {:?} automaton in {:?}",
*name,
start_local.elapsed()
);
lib.insert(*name, (spec(), automaton));
}
println!(
">> Full parsing library re-computed in {:?}!\n======\n>> Now checking the consistency of serialised data.",
start.elapsed()
);
check_serialization(&lib)
}
fn check_accepted(
automaton: &Automaton,
spec: StdLibParser,
index: usize,
input: &[u8],
) -> Vec<usize> {
println!("\n -> accepting test nb. {index}");
let (v, outputs, interrupted) = automaton.run(input);
let counter = v.len() - 1;
assert!(!interrupted,
"[spec {:?}, accept #{index}] stuck after {counter} transitions, reading byte {} ({:02X}). Partial input:\n\n{}\n\n(bytes {:02X?})",
spec,
input[counter],
input[counter],
String::from_utf8_lossy(&input[..counter]),
&input[..counter],
);
let state = v[counter];
assert!(
automaton.final_states.contains(&state),
"[spec {:?}, accept #{index}] non-final state {state} after {counter} transitions",
spec
);
outputs
}
fn check_rejected(automaton: &Automaton, spec: StdLibParser, index: usize, input: &[u8]) {
println!("\n -> rejecting test nb. {index}");
let (v, outputs, interrupted) = automaton.run(input);
let counter = v.len() - 1;
if interrupted {
println!(
"... rejected as expected (stuck after {counter} transitions at byte {} ({:02X})).",
input[counter], input[counter],
)
} else {
let state = v[counter];
assert!(
!automaton.final_states.contains(&state),
"[spec {:?}, reject #{index}] unexpectedly accepted (final state {state} after {counter} transitions). Raw outputs: {:?}",
spec, outputs
);
println!(
"... rejected as expected (non-final state {state} after {counter} transitions)."
)
}
}
pub(super) fn specs_one_test_with_markers(
spec_library: &FxHashMap<StdLibParser, (Regex, Automaton)>,
spec: StdLibParser,
accepted: &[MarkerTestVector<'_>],
rejected: &[&[u8]],
) {
let (_, automaton) = spec_library.get(&spec).unwrap();
println!("\n\n** TEST of the spec {:?}", spec);
for (index, &(input, expected_outputs)) in accepted.iter().enumerate() {
let output_automaton = check_accepted(automaton, spec, index, input);
let mut outputs = FxHashMap::with_capacity_and_hasher(2, FxBuildHasher);
for (&o, &i) in output_automaton.iter().zip(input) {
if o != 0 {
outputs.entry(o).or_insert(vec![]).push(i);
}
}
if let Some(n) =
outputs.iter().find(|&(i, _)| expected_outputs.iter().all(|(j, _)| i != j))
{
panic!(
"[test of spec {:?}, nb. {index}]: accepted as expected, but has unexpected marker {}.",
spec, n.0
)
}
for (i, expected_output_bytes) in expected_outputs {
match outputs.get(i) {
None => panic!(
"[test of spec {:?}, nb. {index}]: accepted as expected, but missing marker {i}.",
spec
),
Some(output_bytes) => {
assert!(output_bytes == expected_output_bytes,
"[test of spec {:?}, nb. {index}]: output for marker {i} is\n \"{}\"\ninstead of\n \"{}\"",
spec,
String::from_utf8_lossy(output_bytes),
String::from_utf8_lossy(expected_output_bytes),
);
}
}
}
let counter = input.len();
println!("... accepted with correct outputs ({counter} transitions). The outputs are:");
for (i, o) in expected_outputs {
println!(" - {i}: {}", String::from_utf8_lossy(o))
}
}
for (index, input) in rejected.iter().enumerate() {
check_rejected(automaton, spec, index, input);
}
}
pub(super) fn _specs_one_test_with_outputs(
spec_library: &FxHashMap<StdLibParser, (Regex, Automaton)>,
spec: StdLibParser,
accepted: &[super::super::OutputTestVector<'_>],
rejected: &[&[u8]],
) {
let (_, automaton) = spec_library.get(&spec).unwrap();
println!("\n\n** TEST of the spec {:?}", spec);
for (index, &(input, expected)) in accepted.iter().enumerate() {
let actual = check_accepted(automaton, spec, index, input);
assert_eq!(
actual, expected,
"[spec {:?}, accept #{index}]: output mismatch",
spec
);
println!("... accepted with correct outputs: {actual:?}");
}
for (index, input) in rejected.iter().enumerate() {
check_rejected(automaton, spec, index, input);
}
}
#[test]
fn specs_test() {
configure_serialisation();
println!(">> Now configuring the spec library for tests... (using the serialised data)");
let start = Instant::now();
let spec_library = spec_library();
println!(
">> Configuration completed in {:?}. Automaton breakdown:",
start.elapsed()
);
let mut total = 0;
for (name, (_, automaton)) in &spec_library {
println!(
" - {:?}: {} states, {} transitions",
name,
automaton.nb_states,
automaton.transitions.values().map(|m| m.len()).sum::<usize>()
);
total += automaton.transitions.values().map(|m| m.len()).sum::<usize>()
+ automaton.final_states.len()
}
println!(
">> Total nb of lookup rows in the chip: {} ≤ 2^{}",
total,
total.next_power_of_two().trailing_zeros()
);
super::jwt::test_jwt(&spec_library);
super::icao9303_td3_dg1::test_dg1(&spec_library);
}
}