pub use analysis::Analysis;
use binary::Binary;
pub use builder::Builder;
use documentation::Documentation;
pub use error::{Error, ErrorKind};
use generated::Generated;
#[cfg(feature = "directory")]
pub use file_source::Directory;
#[cfg(feature = "git")]
pub use file_source::Git;
pub use file_source::FileSource;
use glob::MatchOptions;
use indexmap::IndexMap;
use language::Category;
pub use language::Language;
use std::error::Error as ErrorTrait;
use std::path::Path;
use vendored::Vendored;
use rayon::prelude::{FromParallelIterator, ParallelBridge, ParallelIterator};
use serde::Serialize;
pub mod analysis;
mod binary;
mod builder;
mod documentation;
mod error;
mod file_source;
mod generated;
pub mod language;
mod vendored;
type GenericError = Box<dyn ErrorTrait>;
type Result<T, E = GenericError> = std::result::Result<T, E>;
const GLOB_MATCH_OPTIONS: MatchOptions = MatchOptions {
case_sensitive: true,
require_literal_separator: true,
require_literal_leading_dot: false,
};
pub struct Gengo<FS: for<'fs> FileSource<'fs>> {
file_source: FS,
read_limit: usize,
binary: Binary,
documentation: Documentation,
generated: Generated,
vendored: Vendored,
}
impl<FS: for<'fs> FileSource<'fs>> Gengo<FS> {
pub fn analyze(&self) -> Result<Analysis> {
let state = self.file_source.state()?;
let entries = self
.file_source
.entries()?
.par_bridge()
.map_with(state, |state, entry| {
let filepath = self.file_source.filepath(&entry, state).ok()?;
let contents = self.file_source.contents(&entry, state).ok()?;
let entry = self.analyze_blob(&filepath, contents, state)?;
Some((filepath.as_ref().to_owned(), entry))
})
.filter_map(|entry| entry);
let entries = IndexMap::from_par_iter(entries);
Ok(Analysis(entries))
}
fn analyze_blob(
&self,
filepath: impl AsRef<Path>,
contents: impl AsRef<[u8]>,
state: &mut <FS as FileSource>::State,
) -> Option<Entry> {
let overrides = self.file_source.overrides(&filepath, state);
let filepath = filepath.as_ref();
let contents = contents.as_ref();
if self.is_binary(filepath, contents) {
return None;
}
let language = overrides
.language
.or_else(|| Language::pick(filepath, contents, self.read_limit))?;
let generated = overrides
.is_generated
.unwrap_or_else(|| self.is_generated(filepath, contents));
let documentation = overrides
.is_documentation
.unwrap_or_else(|| self.is_documentation(filepath, contents));
let vendored = overrides
.is_vendored
.unwrap_or_else(|| self.is_vendored(filepath, contents));
let detectable = match language.category() {
Category::Data | Category::Prose => false,
Category::Pattern | Category::Programming | Category::Markup | Category::Query => {
!(generated || documentation || vendored)
}
category => unimplemented!("Failed to check if category {category:?} is detectable"),
};
let detectable = overrides.is_detectable.unwrap_or(detectable);
let size = contents.len();
let entry = Entry {
language,
size,
detectable,
generated,
documentation,
vendored,
};
Some(entry)
}
pub fn is_generated(&self, filepath: impl AsRef<Path>, contents: &[u8]) -> bool {
self.generated.is_generated(filepath, contents)
}
pub fn is_documentation(&self, filepath: impl AsRef<Path>, contents: &[u8]) -> bool {
self.documentation.is_documentation(filepath, contents)
}
pub fn is_vendored(&self, filepath: impl AsRef<Path>, contents: &[u8]) -> bool {
self.vendored.is_vendored(filepath, contents)
}
pub fn is_binary(&self, filepath: impl AsRef<Path>, contents: &[u8]) -> bool {
self.binary.is_binary(filepath, contents)
}
}
#[derive(Debug, Serialize)]
pub struct Entry {
language: Language,
size: usize,
detectable: bool,
generated: bool,
documentation: bool,
vendored: bool,
}
impl Entry {
pub fn language(&self) -> &Language {
&self.language
}
pub fn size(&self) -> usize {
self.size
}
pub fn detectable(&self) -> bool {
self.detectable
}
pub fn generated(&self) -> bool {
self.generated
}
pub fn documentation(&self) -> bool {
self.documentation
}
pub fn vendored(&self) -> bool {
self.vendored
}
}