criner_waste_report/
lib.rs

1#![deny(unsafe_code)]
2
3#[macro_use]
4extern crate lazy_static;
5
6#[cfg(feature = "html")]
7pub mod html;
8pub mod result;
9
10#[cfg(test)]
11mod test;
12
13use serde_derive::{Deserialize, Serialize};
14use std::collections::BTreeMap;
15
16pub use result::{globset_from_patterns, tar_path_to_utf8_str};
17
18pub type Patterns = Vec<String>;
19
20/// An entry in a tar archive, including the most important meta-data
21#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
22pub struct TarHeader {
23    /// The normalized path of the entry. May not be unicode encoded.
24    pub path: Vec<u8>,
25    /// The size of the file in bytes
26    pub size: u64,
27    /// The type of entry, to be analyzed with tar::EntryType
28    pub entry_type: u8,
29}
30
31#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
32pub struct TarPackage {
33    /// Meta data of all entries in the crate
34    pub entries_meta_data: Vec<TarHeader>,
35    /// The actual content of selected files, Cargo.*, build.rs and lib/main
36    /// IMPORTANT: This file may be partial and limited in size unless it is Cargo.toml, which
37    /// is always complete.
38    /// Note that these are also present in entries_meta_data.
39    pub entries: Vec<(TarHeader, Vec<u8>)>,
40}
41
42#[derive(PartialEq, Eq, Debug, Clone, Deserialize, Serialize)]
43pub struct PotentialWaste {
44    pub patterns_to_fix: Patterns,
45    pub potential_waste: Vec<TarHeader>,
46}
47
48#[derive(PartialEq, Eq, Debug, Clone, Deserialize, Serialize)]
49pub enum Fix {
50    ImprovedInclude {
51        include: Patterns,
52        include_removed: Patterns,
53        potential: Option<PotentialWaste>,
54        has_build_script: bool,
55    },
56    EnrichedExclude {
57        exclude: Patterns,
58        exclude_added: Patterns,
59        has_build_script: bool,
60    },
61    NewInclude {
62        include: Patterns,
63        has_build_script: bool,
64    },
65    RemoveExcludeAndUseInclude {
66        include_added: Patterns,
67        include: Patterns,
68        include_removed: Patterns,
69    },
70    RemoveExclude,
71}
72
73impl Fix {
74    pub fn merge(self, rhs: Option<PotentialWaste>, mut waste: Vec<TarHeader>) -> (Fix, Vec<TarHeader>) {
75        match (self, rhs) {
76            (
77                Fix::NewInclude {
78                    mut include,
79                    has_build_script,
80                },
81                Some(potential),
82            ) => (
83                Fix::NewInclude {
84                    has_build_script,
85                    include: {
86                        include.extend(potential.patterns_to_fix);
87                        include
88                    },
89                },
90                {
91                    waste.extend(potential.potential_waste);
92                    waste
93                },
94            ),
95            (lhs, _) => (lhs, waste),
96        }
97    }
98}
99
100#[derive(Default, Deserialize)]
101pub struct CargoConfig {
102    pub package: Option<PackageSection>,
103    pub lib: Option<SectionWithPath>,
104    pub bin: Option<Vec<SectionWithPath>>,
105}
106
107impl CargoConfig {
108    pub fn actual_or_expected_build_script_path(&self) -> &str {
109        self.build_script_path().unwrap_or("build.rs")
110    }
111    pub fn build_script_path(&self) -> Option<&str> {
112        self.package.as_ref().and_then(|p| p.build_script_path())
113    }
114    pub fn lib_path(&self) -> &str {
115        self.lib
116            .as_ref()
117            .and_then(|l| l.path.as_deref())
118            .unwrap_or("src/lib.rs")
119    }
120    pub fn bin_paths(&self) -> Vec<&str> {
121        self.bin
122            .as_ref()
123            .map(|l| l.iter().filter_map(|s| s.path.as_deref()).collect())
124            .unwrap_or_else(|| vec!["src/main.rs"])
125    }
126}
127
128impl From<&str> for CargoConfig {
129    fn from(v: &str) -> Self {
130        toml::from_str::<CargoConfig>(v).unwrap_or_default() // you would think all of them parse OK, but that's wrong :D
131    }
132}
133
134#[derive(Default, Deserialize)]
135pub struct SectionWithPath {
136    pub path: Option<String>,
137}
138
139#[derive(Default, Deserialize)]
140pub struct PackageSection {
141    pub include: Option<Patterns>,
142    pub exclude: Option<Patterns>,
143    pub build: Option<toml::value::Value>,
144}
145
146impl PackageSection {
147    pub fn build_script_path(&self) -> Option<&str> {
148        self.build.as_ref().and_then(|s| s.as_str())
149    }
150}
151
152pub type WastedFile = (String, u64);
153
154#[derive(Default, Debug, Eq, PartialEq, Clone, Deserialize, Serialize)]
155pub struct AggregateFileInfo {
156    pub total_bytes: u64,
157    pub total_files: u64,
158}
159
160impl std::ops::AddAssign for AggregateFileInfo {
161    fn add_assign(&mut self, rhs: Self) {
162        let Self {
163            total_bytes,
164            total_files,
165        } = rhs;
166        self.total_bytes += total_bytes;
167        self.total_files += total_files;
168    }
169}
170
171impl std::ops::AddAssign for VersionInfo {
172    fn add_assign(&mut self, rhs: Self) {
173        let Self {
174            all,
175            waste,
176            potential_gains,
177            waste_latest_version,
178        } = rhs;
179        self.all += all;
180        self.waste += waste;
181        self.potential_gains = add_optional_aggregate(self.potential_gains.clone(), potential_gains);
182        self.waste_latest_version =
183            add_named_optional_aggregate(self.waste_latest_version.clone(), waste_latest_version);
184    }
185}
186
187fn add_named_optional_aggregate(
188    lhs: Option<(String, AggregateFileInfo)>,
189    rhs: Option<(String, AggregateFileInfo)>,
190) -> Option<(String, AggregateFileInfo)> {
191    Some(match (lhs, rhs) {
192        (Some((lhs_name, lhs)), Some((rhs_name, _))) if lhs_name > rhs_name => (lhs_name, lhs),
193        (Some(_), Some((rhs_name, rhs))) => (rhs_name, rhs),
194        (Some(v), None) => v,
195        (None, Some(v)) => v,
196        (None, None) => return None,
197    })
198}
199
200pub fn add_optional_aggregate(
201    lhs: Option<AggregateFileInfo>,
202    rhs: Option<AggregateFileInfo>,
203) -> Option<AggregateFileInfo> {
204    Some(match (lhs, rhs) {
205        (Some(mut lhs), Some(rhs)) => {
206            lhs += rhs;
207            lhs
208        }
209        (Some(v), None) => v,
210        (None, Some(v)) => v,
211        (None, None) => return None,
212    })
213}
214
215#[derive(Default, Debug, Eq, PartialEq, Clone, Deserialize, Serialize)]
216pub struct VersionInfo {
217    pub all: AggregateFileInfo,
218    pub waste: AggregateFileInfo,
219    pub waste_latest_version: Option<(String, AggregateFileInfo)>,
220    pub potential_gains: Option<AggregateFileInfo>,
221}
222
223pub type AggregateVersionInfo = VersionInfo;
224
225pub type Dict<T> = BTreeMap<String, T>;
226
227#[derive(Debug, Eq, PartialEq, Clone, Deserialize, Serialize)]
228pub enum Report {
229    Version {
230        crate_name: String,
231        crate_version: String,
232        total_size_in_bytes: u64,
233        total_files: u64,
234        wasted_files: Vec<WastedFile>,
235        suggested_fix: Option<Fix>,
236    },
237    Crate {
238        crate_name: String,
239        total_size_in_bytes: u64,
240        total_files: u64,
241        info_by_version: Dict<VersionInfo>,
242        wasted_by_extension: Dict<AggregateFileInfo>,
243    },
244    CrateCollection {
245        total_size_in_bytes: u64,
246        total_files: u64,
247        info_by_crate: Dict<AggregateVersionInfo>,
248        wasted_by_extension: Dict<AggregateFileInfo>,
249    },
250}
251
252fn remove_implicit_entries(entries: &mut Vec<TarHeader>) {
253    entries.retain(|e| {
254        let p = tar_path_to_utf8_str(&e.path);
255        p != ".cargo_vcs_info.json" && p != "Cargo.toml.orig"
256    });
257}
258
259impl Report {
260    pub fn from_package(
261        crate_name: &str,
262        crate_version: &str,
263        TarPackage {
264            mut entries_meta_data,
265            entries,
266        }: TarPackage,
267    ) -> Report {
268        remove_implicit_entries(&mut entries_meta_data);
269        let total_size_in_bytes = entries_meta_data.iter().map(|e| e.size).sum();
270        let total_files = entries_meta_data.len() as u64;
271        let cargo_config = Self::cargo_config_from_entries(&entries);
272        let (includes, excludes, compile_time_includes, build_script_name) =
273            Self::cargo_config_into_includes_excludes(cargo_config, &entries, &entries_meta_data);
274        let (suggested_fix, wasted_files) = match (includes, excludes, build_script_name, compile_time_includes) {
275            (Some(includes), Some(excludes), _presence_of_build_script_not_relevant, _) => {
276                Self::compute_includes_from_includes_and_excludes(entries_meta_data, includes, excludes)
277            }
278            (Some(includes), None, build_script_name, _) => {
279                Self::enrich_includes(entries_meta_data, includes, build_script_name.is_some())
280            }
281            (None, Some(excludes), build_script_name, compile_time_includes) => Self::enrich_excludes(
282                entries_meta_data,
283                excludes,
284                compile_time_includes,
285                build_script_name.is_some(),
286            ),
287            (None, None, build_script_name, compile_time_includes) => {
288                Self::standard_includes(entries_meta_data, build_script_name, compile_time_includes)
289            }
290        };
291        let wasted_files = Self::convert_to_wasted_files(wasted_files);
292        Report::Version {
293            crate_name: crate_name.into(),
294            crate_version: crate_version.into(),
295            total_size_in_bytes,
296            total_files,
297            wasted_files,
298            suggested_fix,
299        }
300    }
301}