extern crate encoding_rs;
mod finding;
mod finding_collection;
mod help;
mod helper;
mod input;
mod mission;
mod options;
mod scanner;
use crate::finding::OUTPUT_LINE_METADATA_LEN;
use crate::finding_collection::FindingCollection;
use crate::help::help;
use crate::input::Slicer;
use crate::mission::MISSIONS;
use crate::options::ARGS;
use crate::scanner::ScannerStates;
use itertools::kmerge;
use scoped_threadpool::Pool;
use std::fs::File;
use std::io;
use std::io::LineWriter;
use std::io::Write;
use std::path::Path;
use std::pin::Pin;
use std::process;
use std::str;
use std::sync::mpsc;
use std::thread;
use std::thread::JoinHandle;
const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
const AUTHOR: &str = "(c) Jens Getreu, 2016-2024";
fn run() -> Result<(), anyhow::Error> {
let merger: JoinHandle<_>;
{
let n_threads = MISSIONS.len();
let (tx, rx) = mpsc::sync_channel(n_threads);
merger = thread::spawn(move || {
let mut output = match ARGS.output {
Some(ref fname) => {
let f = File::create(Path::new(fname))?;
let output_line_len =
2 * MISSIONS[0].output_line_char_nb_max + OUTPUT_LINE_METADATA_LEN;
let f = LineWriter::with_capacity(output_line_len, f);
Box::new(f) as Box<dyn Write>
}
None => Box::new(io::stdout()) as Box<dyn Write>,
};
output.write_all("\u{feff}".as_bytes())?;
'batch_receiver: loop {
let mut results: Vec<Pin<Box<FindingCollection>>> = Vec::with_capacity(n_threads);
for _ in 0..n_threads {
results.push(match rx.recv() {
Ok(fc) => fc,
_ => break 'batch_receiver,
});
}
for finding in kmerge(&results) {
finding.print(&mut output)?;
}
}
output.write_all(&[b'\n'])?;
output.flush()?;
Ok(())
});
let input = Slicer::new();
let mut sss = ScannerStates::new(&MISSIONS);
let mut pool = Pool::new(MISSIONS.len() as u32);
for (slice, input_file_id, is_last_input_buffer) in input {
pool.scoped(|scope| {
for ss in sss.v.iter_mut() {
let tx = tx.clone();
scope.execute(move || {
let fc =
FindingCollection::from(ss, input_file_id, slice, is_last_input_buffer);
tx.send(fc).expect(
"Error: Can not sent result through output channel. \
Write permissions? Is there enough space? ",
);
});
}
});
}
}
merger.join().unwrap()
}
fn main() {
help();
if let Err(e) = run() {
eprintln!("Error: `{:?}`.", e);
process::exit(1);
}
}
#[cfg(test)]
mod tests {
use crate::finding::Precision;
use crate::finding_collection::FindingCollection;
use crate::mission::Missions;
use crate::options::{Args, Radix};
use crate::scanner::ScannerState;
use itertools::Itertools;
use lazy_static::lazy_static;
use std::path::PathBuf;
lazy_static! {
pub static ref ARGS: Args = Args {
inputs: vec![PathBuf::from("myfile.txt")],
debug_option: false,
encoding: vec!["ascii".to_string(), "utf-8".to_string()],
list_encodings: false,
version: false,
chars_min: Some("5".to_string()),
same_unicode_block: true,
grep_char: None,
radix: Some(Radix::X),
output: None,
output_line_len: Some("30".to_string()),
no_metadata: false,
counter_offset: Some("5000".to_string()),
ascii_filter: None,
unicode_block_filter: None,
};
}
lazy_static! {
pub static ref MISSIONS: Missions = Missions::new(
ARGS.counter_offset.as_ref(),
&ARGS.encoding,
ARGS.chars_min.as_ref(),
ARGS.same_unicode_block,
ARGS.ascii_filter.as_ref(),
ARGS.unicode_block_filter.as_ref(),
ARGS.grep_char.as_ref(),
ARGS.output_line_len.as_ref(),
)
.unwrap();
}
#[test]
fn test_merger() {
use std::pin::Pin;
let inp = "abcdefgÜhijklmn€opÜqrstuvwÜxyz".as_bytes();
let missions = &MISSIONS;
let mut ss0 = ScannerState::new(&missions.v[0]);
let mut ss1 = ScannerState::new(&missions.v[1]);
let mut resv: Vec<Pin<Box<FindingCollection>>> = Vec::new();
let fc = FindingCollection::from(&mut ss0, Some(0), inp, true);
resv.push(fc);
let fc = FindingCollection::from(&mut ss1, Some(0), inp, true);
resv.push(fc);
assert_eq!(resv.len(), 2);
assert_eq!(resv[0].v.len(), 3);
assert_eq!(resv[0].v[0].s, "abcdefg");
assert_eq!(resv[0].v[1].s, "hijklmn");
assert_eq!(resv[0].v[2].s, "qrstuvw");
assert_eq!(resv[1].v.len(), 2);
assert_eq!(resv[1].v[0].s, "abcdefgÜhijklmn");
assert_eq!(resv[1].v[1].s, "opÜqrstuvwÜxyz");
let mut iter = resv.iter().kmerge();
let f = iter.next().unwrap();
assert_eq!(f.s, "abcdefg");
assert_eq!(f.position, 5000);
assert_eq!(f.position_precision, Precision::Exact);
assert_eq!(f.mission.mission_id, 0);
let f = iter.next().unwrap();
assert_eq!(f.s, "hijklmn");
assert_eq!(f.position, 5000);
assert_eq!(f.position_precision, Precision::After);
assert_eq!(f.mission.mission_id, 0);
let f = iter.next().unwrap();
assert_eq!(f.s, "qrstuvw");
assert_eq!(f.position, 5000);
assert_eq!(f.position_precision, Precision::After);
assert_eq!(f.mission.mission_id, 0);
let f = iter.next().unwrap();
assert_eq!(f.s, "abcdefgÜhijklmn");
assert_eq!(f.position, 5000);
assert_eq!(f.position_precision, Precision::Exact);
assert_eq!(f.mission.mission_id, 1);
let f = iter.next().unwrap();
assert_eq!(f.s, "opÜqrstuvwÜxyz");
assert_eq!(f.position, 5000);
assert_eq!(f.position_precision, Precision::After);
assert_eq!(f.mission.mission_id, 1);
let f = iter.next();
assert_eq!(f, None);
}
}