extern crate encoding_rs;
use crate::input::ByteCounter;
use crate::mission::Mission;
use crate::mission::MISSIONS;
use encoding_rs::Decoder;
use std::ops::Deref;
pub struct ScannerStates {
pub v: Vec<ScannerState>,
}
impl ScannerStates {
pub fn new(missions: &'static MISSIONS) -> Self {
let mut v = Vec::with_capacity(missions.len());
for i in 0..missions.len() {
v.push(ScannerState::new(&missions[i]))
}
Self { v }
}
}
impl Deref for ScannerStates {
type Target = Vec<ScannerState>;
fn deref(&self) -> &Self::Target {
&self.v
}
}
pub struct ScannerState {
pub mission: &'static Mission,
pub decoder: Decoder,
pub last_scan_run_leftover: String,
pub last_run_str_was_printed_and_is_maybe_cut_str: bool,
pub consumed_bytes: ByteCounter,
}
impl<'a> ScannerState {
pub fn new(mission: &'static Mission) -> Self {
Self {
mission,
decoder: mission.encoding.new_decoder_without_bom_handling(),
last_scan_run_leftover: String::with_capacity(mission.output_line_char_nb_max as usize),
last_run_str_was_printed_and_is_maybe_cut_str: false,
consumed_bytes: mission.counter_offset,
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::finding::Precision;
use crate::finding_collection::FindingCollection;
use crate::mission::Mission;
use crate::mission::{Utf8Filter, AF_ALL, AF_CTRL, AF_WHITESPACE, UBF_LATIN, UBF_NONE};
use crate::mission::{UTF8_FILTER_ALL_VALID, UTF8_FILTER_LATIN};
use encoding_rs::Encoding;
use lazy_static::lazy_static;
lazy_static! {
pub static ref MISSION_ALL_UTF8: Mission = Mission {
mission_id: 0,
counter_offset: 10_000,
print_encoding_as_ascii: false,
encoding: &Encoding::for_label(("utf-8").as_bytes()).unwrap(),
chars_min_nb: 3,
require_same_unicode_block: false,
filter: UTF8_FILTER_ALL_VALID,
output_line_char_nb_max: 10,
};
}
lazy_static! {
pub static ref MISSION_LATIN_UTF8: Mission = Mission {
mission_id: 0,
counter_offset: 10_000,
print_encoding_as_ascii: false,
encoding: &Encoding::for_label(("utf-8").as_bytes()).unwrap(),
chars_min_nb: 3,
require_same_unicode_block: false,
filter: UTF8_FILTER_LATIN,
output_line_char_nb_max: 10,
};
}
lazy_static! {
pub static ref MISSION_LATIN_UTF8_GREP42: Mission = Mission {
mission_id: 0,
counter_offset: 10_000,
print_encoding_as_ascii: false,
encoding: &Encoding::for_label(("utf-8").as_bytes()).unwrap(),
chars_min_nb: 3,
require_same_unicode_block: false,
filter: Utf8Filter {
af: AF_ALL & !AF_CTRL | AF_WHITESPACE,
ubf: UBF_LATIN,
grep_char: Some(42),
},
output_line_char_nb_max: 10,
};
}
lazy_static! {
pub static ref MISSION_ALL_X_USER_DEFINED: Mission = Mission {
mission_id: 0,
counter_offset: 10_000,
print_encoding_as_ascii: false,
encoding: &Encoding::for_label(("x-user-defined").as_bytes()).unwrap(),
chars_min_nb: 3,
require_same_unicode_block: false,
filter: UTF8_FILTER_ALL_VALID,
output_line_char_nb_max: 10,
};
}
lazy_static! {
pub static ref MISSION_ASCII: Mission = Mission {
mission_id: 0,
counter_offset: 10_000,
print_encoding_as_ascii: false,
encoding: &Encoding::for_label(("x-user-defined").as_bytes()).unwrap(),
chars_min_nb: 3,
require_same_unicode_block: false,
filter: Utf8Filter {
af: AF_ALL & !AF_CTRL | AF_WHITESPACE,
ubf: UBF_NONE,
grep_char: None,
},
output_line_char_nb_max: 10,
};
}
lazy_static! {
pub static ref MISSION_REAL_DATA_SCAN: Mission = Mission {
mission_id: 0,
counter_offset: 10_000,
print_encoding_as_ascii: false,
encoding: &Encoding::for_label(("utf-8").as_bytes()).unwrap(),
chars_min_nb: 4,
require_same_unicode_block: false,
filter: UTF8_FILTER_LATIN,
output_line_char_nb_max: 60,
};
}
#[test]
fn test_scan_input_buffer_chunks() {
let m: &'static Mission = &MISSION_ALL_UTF8;
let mut ss = ScannerState::new(m);
let input = b"a234567890b234567890c234";
let fc = FindingCollection::from(&mut ss, Some(0), input, true);
assert_eq!(fc.v[0].position, 10000);
assert_eq!(fc.v[0].position_precision, Precision::Exact);
assert_eq!(fc.v[0].s, "a234567890");
assert_eq!(fc.v[1].position, 10000);
assert_eq!(fc.v[1].position_precision, Precision::After);
assert_eq!(fc.v[1].s, "b234567890");
assert_eq!(fc.v[2].position, 10020);
assert_eq!(fc.v[2].position_precision, Precision::Exact);
assert_eq!(fc.v[2].s, "c234");
assert_eq!(ss.last_run_str_was_printed_and_is_maybe_cut_str, false);
assert_eq!(fc.first_byte_position, 10000);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(ss.consumed_bytes, 10000 + 24);
}
#[test]
fn test_scan_store_in_scanner_state() {
let m: &'static Mission = &MISSION_ALL_UTF8;
let mut ss = ScannerState::new(m);
let input = b"a234567890b234567890c2";
let fc = FindingCollection::from(&mut ss, Some(0), input, true);
assert_eq!(fc.v.len(), 3);
assert_eq!(fc.first_byte_position, 10000);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(fc.v[0].position, 10000);
assert_eq!(fc.v[0].position_precision, Precision::Exact);
assert_eq!(fc.v[0].s, "a234567890");
assert_eq!(fc.v[1].position, 10000);
assert_eq!(fc.v[1].position_precision, Precision::After);
assert_eq!(fc.v[1].s, "b234567890");
assert_eq!(fc.v[2].position, 10020);
assert_eq!(fc.v[2].position_precision, Precision::Exact);
assert_eq!(fc.v[2].s, "c2");
assert_eq!(ss.last_run_str_was_printed_and_is_maybe_cut_str, false);
assert_eq!(ss.consumed_bytes, 10000 + 22);
}
#[test]
fn test_split_str_iterator_and_store_in_scanner_state() {
let m: &'static Mission = &MISSION_ALL_UTF8;
let mut ss = ScannerState::new(m);
let input = b"You\xC0\x82\xC0co";
let fc = FindingCollection::from(&mut ss, Some(0), input, false);
assert_eq!(fc.v[0].position, 10000);
assert_eq!(fc.v[0].position_precision, Precision::Exact);
assert_eq!(fc.v[0].s, "You");
assert_eq!(fc.v.len(), 1);
assert_eq!(ss.last_scan_run_leftover, "co");
assert_eq!(fc.first_byte_position, 10000);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(ss.consumed_bytes, 10000 + 8);
let input = b"me\xC0\x82\xC0home.";
let fc = FindingCollection::from(&mut ss, Some(0), input, true);
assert_eq!(fc.v.len(), 2);
assert_eq!(fc.v[0].position, 10008);
assert_eq!(fc.v[0].position_precision, Precision::Before);
assert_eq!(fc.v[0].s, "come");
assert_eq!(fc.v[1].position, 10013);
assert_eq!(fc.v[1].position_precision, Precision::Exact);
assert_eq!(fc.v[1].s, "home.");
assert_eq!(ss.last_scan_run_leftover, "");
assert_eq!(fc.first_byte_position, 10008);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(ss.consumed_bytes, 10008 + 10);
}
#[test]
fn test_grep_in_scan() {
let m: &'static Mission = &MISSION_LATIN_UTF8_GREP42;
let mut ss = ScannerState::new(m);
let input = b"You\xC0\x82\xC0co";
let fc = FindingCollection::from(&mut ss, Some(0), input, false);
assert_eq!(fc.v.len(), 0);
assert_eq!(ss.last_scan_run_leftover, "co");
assert_eq!(fc.first_byte_position, 10000);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(ss.consumed_bytes, 10000 + 8);
let input = b"me*\xC0\x82\xC0ho*me.\x82";
let fc = FindingCollection::from(&mut ss, Some(0), input, true);
assert_eq!(fc.v.len(), 2);
assert_eq!(fc.v[0].position, 10008);
assert_eq!(fc.v[0].position_precision, Precision::Before);
assert_eq!(fc.v[0].s, "come*");
assert_eq!(fc.v[1].position, 10014);
assert_eq!(fc.v[1].position_precision, Precision::Exact);
assert_eq!(fc.v[1].s, "ho*me.");
assert_eq!(ss.last_scan_run_leftover, "");
assert_eq!(fc.first_byte_position, 10008);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(ss.consumed_bytes, 10008 + 13);
}
#[test]
fn test_scan_buffer_split_multibyte() {
let m: &'static Mission = &MISSION_ALL_UTF8;
let mut ss = ScannerState::new(m);
let input = b"word\xe2\x82";
let _fc = FindingCollection::from(&mut ss, Some(0), input, false);
let input = b"\xacoh\xC0no no";
let fc = FindingCollection::from(&mut ss, Some(0), input, false);
assert_eq!(fc.v[0].position, 10006);
assert_eq!(fc.v[0].position_precision, Precision::Before);
assert_eq!(fc.v[0].s, "word€oh");
assert_eq!(fc.first_byte_position, 10006);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(ss.consumed_bytes, 10006 + 9);
let input = b"\xe2\x82\xacStream end.";
let fc = FindingCollection::from(&mut ss, Some(0), input, true);
assert_eq!(fc.len(), 2);
assert_eq!(fc.v[0].position, 10015);
assert_eq!(fc.v[0].position_precision, Precision::Before);
assert_eq!(fc.v[0].s, "no no€Stre");
assert_eq!(fc.v[1].position, 10015);
assert_eq!(fc.v[1].position_precision, Precision::After);
assert_eq!(fc.v[1].s, "am end.");
assert_eq!(fc.first_byte_position, 10015);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(ss.consumed_bytes, 10015 + 14);
}
#[test]
fn test_to_short1() {
let m: &'static Mission = &MISSION_ALL_UTF8;
let mut ss = ScannerState::new(m);
let input = b"ii\xC0abc\xC0\xC1de\xC0fgh\xC0ijk";
let fc = FindingCollection::from(&mut ss, Some(0), input, false);
assert_eq!(fc.first_byte_position, 10000);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(fc.v.len(), 2);
assert_eq!(fc.v[0].s, "abc");
assert_eq!(fc.v[0].position, 10003);
assert_eq!(fc.v[0].position_precision, Precision::Exact);
assert_eq!(fc.v[1].s, "fgh");
assert_eq!(fc.v[1].position, 10011);
assert_eq!(fc.v[1].position_precision, Precision::Exact);
assert_eq!(ss.consumed_bytes, 10000 + 18);
assert_eq!(ss.last_run_str_was_printed_and_is_maybe_cut_str, false);
assert_eq!(ss.last_scan_run_leftover, "ijk");
let input = b"b\xC0\x82c\xC0def";
let fc = FindingCollection::from(&mut ss, Some(0), input, true);
assert_eq!(fc.first_byte_position, 10018);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(fc.v.len(), 2);
assert_eq!(fc.v[0].position, 10018);
assert_eq!(fc.v[0].position_precision, Precision::Before);
assert_eq!(fc.v[0].s, "ijkb");
assert_eq!(fc.v[1].position, 10023);
assert_eq!(fc.v[1].position_precision, Precision::Exact);
assert_eq!(fc.v[1].s, "def");
assert_eq!(ss.consumed_bytes, 10018 + 8);
assert_eq!(ss.last_run_str_was_printed_and_is_maybe_cut_str, false);
assert_eq!(ss.last_scan_run_leftover, "");
}
#[test]
fn test_to_short2() {
let m: &'static Mission = &MISSION_LATIN_UTF8;
let mut ss = ScannerState::new(m);
let input = "ii€ääà€€de€fgh€ijk".as_bytes();
let fc = FindingCollection::from(&mut ss, Some(0), input, false);
assert_eq!(fc.first_byte_position, 10000);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(fc.v.len(), 2);
assert_eq!(fc.v[0].s, "ääà");
assert_eq!(fc.v[0].position, 10000);
assert_eq!(fc.v[0].position_precision, Precision::Exact);
assert_eq!(fc.v[1].s, "fgh");
assert_eq!(fc.v[1].position, 10020);
assert_eq!(fc.v[1].position_precision, Precision::Before);
assert_eq!(ss.consumed_bytes, 10000 + 31);
assert_eq!(ss.last_run_str_was_printed_and_is_maybe_cut_str, false);
assert_eq!(ss.last_scan_run_leftover, "ijk");
let input = b"b\xC0\x82c\xC0def";
let fc = FindingCollection::from(&mut ss, Some(0), input, true);
assert_eq!(fc.first_byte_position, 10031);
assert_eq!(fc.str_buf_overflow, false);
assert_eq!(fc.v.len(), 2);
assert_eq!(fc.v[0].position, 10031);
assert_eq!(fc.v[0].position_precision, Precision::Before);
assert_eq!(fc.v[0].s, "ijkb");
assert_eq!(fc.v[1].position, 10036);
assert_eq!(fc.v[1].position_precision, Precision::Exact);
assert_eq!(fc.v[1].s, "def");
assert_eq!(ss.consumed_bytes, 10031 + 8);
assert_eq!(ss.last_run_str_was_printed_and_is_maybe_cut_str, false);
assert_eq!(ss.last_scan_run_leftover, "");
}
#[test]
fn test_field_with_zeros() {
let m: &'static Mission = &MISSION_REAL_DATA_SCAN;
let mut ss = ScannerState::new(&m);
let input = b"\x00\x00\x00\x00\x40\x00\x38\x00\x0c\x00\x40\x00\x2c\x00\x2b\x00";
let fc = FindingCollection::from(&mut ss, Some(0), input, false);
assert_ne!(fc.v.len(), 1);
}
}