1pub use analysis::Analysis;
14use binary::Binary;
15pub use builder::Builder;
16use documentation::Documentation;
17
18pub use error::{Error, ErrorKind};
19use generated::Generated;
20
21pub use file_source::{Directory, FileSource, Git};
22use glob::MatchOptions;
23use indexmap::IndexMap;
24use language::Category;
25pub use language::Language;
26
27use std::error::Error as ErrorTrait;
28use std::path::Path;
29
30use vendored::Vendored;
31
32use rayon::prelude::{FromParallelIterator, ParallelBridge, ParallelIterator};
33use serde::Serialize;
34
35pub mod analysis;
36mod binary;
37mod builder;
38mod documentation;
39mod error;
40mod file_source;
41mod generated;
42pub mod language;
43mod vendored;
44
45type GenericError = Box<dyn ErrorTrait>;
46type Result<T, E = GenericError> = std::result::Result<T, E>;
47
48const GLOB_MATCH_OPTIONS: MatchOptions = MatchOptions {
50    case_sensitive: true,
51    require_literal_separator: true,
52    require_literal_leading_dot: false,
53};
54
55pub struct Gengo<FS: for<'fs> FileSource<'fs>> {
57    file_source: FS,
58    read_limit: usize,
59    binary: Binary,
60    documentation: Documentation,
61    generated: Generated,
62    vendored: Vendored,
63}
64
65impl<FS: for<'fs> FileSource<'fs>> Gengo<FS> {
66    pub fn analyze(&self) -> Result<Analysis> {
68        let state = self.file_source.state()?;
69        let entries = self
70            .file_source
71            .entries()?
72            .par_bridge()
73            .map_with(state, |state, entry| {
74                let filepath = self.file_source.filepath(&entry, state).ok()?;
75                let contents = self.file_source.contents(&entry, state).ok()?;
76
77                let entry = self.analyze_blob(&filepath, contents, state)?;
78                Some((filepath.as_ref().to_owned(), entry))
79            })
80            .filter_map(|entry| entry);
81        let entries = IndexMap::from_par_iter(entries);
82
83        Ok(Analysis(entries))
84    }
85
86    fn analyze_blob(
87        &self,
88        filepath: impl AsRef<Path>,
89        contents: impl AsRef<[u8]>,
90        state: &mut <FS as FileSource>::State,
91    ) -> Option<Entry> {
92        let overrides = self.file_source.overrides(&filepath, state);
93        let filepath = filepath.as_ref();
94        let contents = contents.as_ref();
95
96        if self.is_binary(filepath, contents) {
99            return None;
100        }
101
102        let language = overrides
103            .language
104            .or_else(|| Language::pick(filepath, contents, self.read_limit))?;
105        let generated = overrides
106            .is_generated
107            .unwrap_or_else(|| self.is_generated(filepath, contents));
108        let documentation = overrides
109            .is_documentation
110            .unwrap_or_else(|| self.is_documentation(filepath, contents));
111        let vendored = overrides
112            .is_vendored
113            .unwrap_or_else(|| self.is_vendored(filepath, contents));
114
115        let detectable = match language.category() {
116            Category::Data | Category::Prose => false,
117            Category::Pattern | Category::Programming | Category::Markup | Category::Query => {
118                !(generated || documentation || vendored)
119            }
120        };
121        let detectable = overrides.is_detectable.unwrap_or(detectable);
122
123        let size = contents.len();
124        let entry = Entry {
125            language,
126            size,
127            detectable,
128            generated,
129            documentation,
130            vendored,
131        };
132        Some(entry)
133    }
134
135    pub fn is_generated(&self, filepath: impl AsRef<Path>, contents: &[u8]) -> bool {
137        self.generated.is_generated(filepath, contents)
138    }
139
140    pub fn is_documentation(&self, filepath: impl AsRef<Path>, contents: &[u8]) -> bool {
142        self.documentation.is_documentation(filepath, contents)
143    }
144
145    pub fn is_vendored(&self, filepath: impl AsRef<Path>, contents: &[u8]) -> bool {
147        self.vendored.is_vendored(filepath, contents)
148    }
149
150    pub fn is_binary(&self, filepath: impl AsRef<Path>, contents: &[u8]) -> bool {
152        self.binary.is_binary(filepath, contents)
153    }
154}
155
156#[derive(Debug, Serialize)]
158pub struct Entry {
159    language: Language,
161    size: usize,
163    detectable: bool,
165    generated: bool,
167    documentation: bool,
169    vendored: bool,
171}
172
173impl Entry {
174    pub fn language(&self) -> &Language {
176        &self.language
177    }
178
179    pub fn size(&self) -> usize {
181        self.size
182    }
183
184    pub fn detectable(&self) -> bool {
186        self.detectable
187    }
188
189    pub fn generated(&self) -> bool {
191        self.generated
192    }
193
194    pub fn documentation(&self) -> bool {
196        self.documentation
197    }
198
199    pub fn vendored(&self) -> bool {
201        self.vendored
202    }
203}