Skip to main content

tokmd_analysis_assets/
lib.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3
4use anyhow::Result;
5use tokmd_analysis_types::{
6    AssetCategoryRow, AssetFileRow, AssetReport, DependencyReport, LockfileReport,
7};
8
9const ASSET_TOP_N: usize = 10;
10
11/// Build aggregate asset inventory for files produced by a walk.
12pub fn build_assets_report(root: &Path, files: &[PathBuf]) -> Result<AssetReport> {
13    let mut categories: BTreeMap<String, (usize, u64, BTreeSet<String>)> = BTreeMap::new();
14    let mut top_files: Vec<AssetFileRow> = Vec::new();
15    let mut total_files = 0usize;
16    let mut total_bytes = 0u64;
17
18    for rel in files {
19        let ext = rel
20            .extension()
21            .and_then(|e| e.to_str())
22            .unwrap_or("")
23            .to_lowercase();
24        if ext.is_empty() {
25            continue;
26        }
27        let category = match asset_category(&ext) {
28            Some(cat) => cat,
29            None => continue,
30        };
31        let bytes = tokmd_walk::file_size(root, rel).unwrap_or(0);
32        total_files += 1;
33        total_bytes += bytes;
34
35        let entry = categories
36            .entry(category.to_string())
37            .or_insert((0, 0, BTreeSet::new()));
38        entry.0 += 1;
39        entry.1 += bytes;
40        entry.2.insert(ext.clone());
41
42        top_files.push(AssetFileRow {
43            path: rel.to_string_lossy().replace('\\', "/"),
44            bytes,
45            category: category.to_string(),
46            extension: ext,
47        });
48    }
49
50    let mut category_rows: Vec<AssetCategoryRow> = categories
51        .into_iter()
52        .map(|(category, (files, bytes, exts))| AssetCategoryRow {
53            category,
54            files,
55            bytes,
56            extensions: exts.into_iter().collect(),
57        })
58        .collect();
59
60    category_rows.sort_by(|a, b| {
61        b.bytes
62            .cmp(&a.bytes)
63            .then_with(|| a.category.cmp(&b.category))
64    });
65    top_files.sort_by(|a, b| b.bytes.cmp(&a.bytes).then_with(|| a.path.cmp(&b.path)));
66    top_files.truncate(ASSET_TOP_N);
67
68    Ok(AssetReport {
69        total_files,
70        total_bytes,
71        categories: category_rows,
72        top_files,
73    })
74}
75
76fn asset_category(ext: &str) -> Option<&'static str> {
77    match ext {
78        "png" | "jpg" | "jpeg" | "gif" | "svg" | "webp" | "bmp" | "tiff" | "ico" => Some("image"),
79        "mp4" | "mov" | "avi" | "mkv" | "webm" | "mpeg" | "mpg" => Some("video"),
80        "mp3" | "wav" | "flac" | "ogg" | "aac" | "m4a" => Some("audio"),
81        "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" => Some("archive"),
82        "exe" | "dll" | "so" | "dylib" | "bin" | "class" | "jar" => Some("binary"),
83        "ttf" | "otf" | "woff" | "woff2" => Some("font"),
84        _ => None,
85    }
86}
87
88/// Build dependency lockfile summary from detected lockfile paths.
89pub fn build_dependency_report(root: &Path, files: &[PathBuf]) -> Result<DependencyReport> {
90    let mut lockfiles: Vec<LockfileReport> = Vec::new();
91
92    for rel in files {
93        let name = rel.file_name().and_then(|n| n.to_str()).unwrap_or("");
94        let path = root.join(rel);
95        let content = std::fs::read_to_string(&path);
96
97        let (kind, count) = match name {
98            "Cargo.lock" => content
99                .as_deref()
100                .map(|c| ("cargo", count_cargo_lock(c)))
101                .unwrap_or(("cargo", 0)),
102            "package-lock.json" => content
103                .as_deref()
104                .map(|c| ("npm", count_package_lock(c)))
105                .unwrap_or(("npm", 0)),
106            "pnpm-lock.yaml" => content
107                .as_deref()
108                .map(|c| ("pnpm", count_pnpm_lock(c)))
109                .unwrap_or(("pnpm", 0)),
110            "yarn.lock" => content
111                .as_deref()
112                .map(|c| ("yarn", count_yarn_lock(c)))
113                .unwrap_or(("yarn", 0)),
114            "go.sum" => content
115                .as_deref()
116                .map(|c| ("go", count_go_sum(c)))
117                .unwrap_or(("go", 0)),
118            "Gemfile.lock" => content
119                .as_deref()
120                .map(|c| ("bundler", count_gemfile_lock(c)))
121                .unwrap_or(("bundler", 0)),
122            _ => continue,
123        };
124
125        lockfiles.push(LockfileReport {
126            path: rel.to_string_lossy().replace('\\', "/"),
127            kind: kind.to_string(),
128            dependencies: count,
129        });
130    }
131
132    let total = lockfiles.iter().map(|l| l.dependencies).sum();
133    Ok(DependencyReport { total, lockfiles })
134}
135
136fn count_cargo_lock(content: &str) -> usize {
137    content.matches("[[package]]").count()
138}
139
140fn count_package_lock(content: &str) -> usize {
141    let parsed: serde_json::Value = match serde_json::from_str(content) {
142        Ok(v) => v,
143        Err(_) => return 0,
144    };
145    if let Some(packages) = parsed.get("packages").and_then(|v| v.as_object()) {
146        let mut count = packages.len();
147        if packages.contains_key("") {
148            count = count.saturating_sub(1);
149        }
150        return count;
151    }
152    parsed
153        .get("dependencies")
154        .and_then(|v| v.as_object())
155        .map(|o| o.len())
156        .unwrap_or(0)
157}
158
159fn count_pnpm_lock(content: &str) -> usize {
160    content
161        .lines()
162        .filter(|line| line.trim_start().starts_with("/") && line.contains(':'))
163        .count()
164}
165
166fn count_yarn_lock(content: &str) -> usize {
167    content
168        .lines()
169        .filter(|line| {
170            let line = line.trim();
171            !line.is_empty() && !line.starts_with('#') && !line.starts_with("version")
172        })
173        .filter(|line| !line.starts_with("  ") && line.ends_with(':'))
174        .count()
175}
176
177fn count_go_sum(content: &str) -> usize {
178    let mut seen: BTreeSet<String> = BTreeSet::new();
179    for line in content.lines() {
180        let mut parts = line.split_whitespace();
181        let module = match parts.next() {
182            Some(v) => v,
183            None => continue,
184        };
185        let version = match parts.next() {
186            Some(v) => v,
187            None => continue,
188        };
189        if version.ends_with("/go.mod") {
190            continue;
191        }
192        seen.insert(format!("{}@{}", module, version));
193    }
194    seen.len()
195}
196
197fn count_gemfile_lock(content: &str) -> usize {
198    let mut count = 0usize;
199    let mut in_specs = false;
200    for line in content.lines() {
201        if line.trim() == "specs:" {
202            in_specs = true;
203            continue;
204        }
205        if in_specs {
206            if line.trim().is_empty() || !line.starts_with("    ") {
207                if !line.starts_with("    ") {
208                    in_specs = false;
209                }
210                continue;
211            }
212            if line.contains('(') {
213                count += 1;
214            }
215        }
216    }
217    count
218}