use std::{
fs::read_to_string,
io::{prelude::*, stdin},
path::{Path, PathBuf},
time::Instant,
};
use anyhow::{format_err, Error};
use log::info;
use super::{commands::*, formats::*, util::*};
use askalono::{ScanMode, ScanStrategy, Store, TextData};
const MIN_SCORE: f32 = 0.8;
pub fn identify(
cache_filename: &Path,
output_format: &OutputFormat,
filename: Option<PathBuf>,
optimize: bool,
want_diff: bool,
batch: bool,
) -> Result<(), Error> {
let cache_inst = Instant::now();
let store = load_store(cache_filename)?;
info!(
"Cache loaded in {} ms",
cache_inst.elapsed().subsec_nanos() as f32 / 1_000_000.0
);
if !batch {
let filename = filename.expect("no filename provided");
let stdin_indicator: PathBuf = "-".into();
let content = if filename == stdin_indicator {
let mut buf = String::new();
stdin().read_to_string(&mut buf)?;
buf
} else {
read_to_string(&filename)?
};
let idres = identify_data(&store, &content.into(), optimize, want_diff);
let file_lossy = filename.to_string_lossy();
let fileres = FileResult::from_identification_result(&file_lossy, &idres);
fileres.print_as(output_format, false);
return idres.map(|_| ());
}
loop {
let mut buf = String::new();
stdin().read_line(&mut buf)?;
if buf.is_empty() {
break;
}
let filename: PathBuf = buf.trim().into();
let content = match read_to_string(filename) {
Ok(c) => c,
Err(e) => {
let fileres = FileResult::Err {
path: &buf,
error: format!("Input error: {}", e),
};
fileres.print_as(output_format, false);
continue;
}
};
let idres = identify_data(&store, &content.into(), optimize, want_diff);
let fileres = FileResult::from_identification_result(&buf, &idres);
fileres.print_as(output_format, false);
}
Ok(())
}
pub fn identify_data(
store: &Store,
text_data: &TextData,
optimize: bool,
want_diff: bool,
) -> Result<CLIIdentification, Error> {
let inst = Instant::now();
let strategy = ScanStrategy::new(store)
.mode(ScanMode::Elimination)
.confidence_threshold(MIN_SCORE)
.optimize(optimize)
.max_passes(1);
let result = strategy.scan(text_data)?;
info!(
"{:?} in {} ms",
result,
inst.elapsed().subsec_nanos() as f32 / 1_000_000.0
);
let mut output = CLIIdentification {
score: result.score,
license: None,
containing: result
.containing
.iter()
.map(|cr| CLIContainedResult {
score: cr.score,
license: CLIIdentifiedLicense {
aliases: store.aliases(cr.license.name).unwrap().clone(),
name: cr.license.name.to_owned(),
kind: cr.license.kind,
},
line_range: cr.line_range,
})
.collect(),
};
if let Some(license) = result.license {
output.license = Some(CLIIdentifiedLicense {
aliases: store.aliases(license.name).unwrap().clone(),
name: license.name.to_owned(),
kind: license.kind,
});
if want_diff {
diff_result(text_data, license.data);
}
return Ok(output);
}
if !output.containing.is_empty() {
if want_diff {
diff_result(text_data, result.containing[0].license.data);
}
return Ok(output);
}
Err(format_err!(
"Confidence threshold not high enough for any known license",
))
}