1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use torsh_core::error::Result;
9
10use crate::package::Package;
11use crate::resources::ResourceType;
12use crate::utils::format_file_size;
13
14fn estimate_compression_ratio_by_type(resource_type: &ResourceType) -> f64 {
16 match resource_type {
17 ResourceType::Model => 0.7, ResourceType::Source => 0.5, ResourceType::Data => 0.6, ResourceType::Config => 0.4, ResourceType::Documentation => 0.3, ResourceType::Text => 0.3, ResourceType::Binary => 0.9, ResourceType::License => 0.4, ResourceType::Metadata => 0.4, }
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct OptimizationReport {
32 pub original_size: u64,
34 pub optimized_size: u64,
36 pub savings: u64,
38 pub savings_percent: f64,
40 pub opportunities: Vec<OptimizationOpportunity>,
42 pub deduplication: DeduplicationAnalysis,
44 pub compression: CompressionAnalysis,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct OptimizationOpportunity {
51 pub optimization_type: OptimizationType,
53 pub description: String,
55 pub potential_savings: u64,
57 pub priority: u8,
59 pub affected_resources: Vec<String>,
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
65pub enum OptimizationType {
66 Deduplication,
68 CompressionUpgrade,
70 RemoveUnused,
72 AddCompression,
74 MergeSmall,
76 SplitLarge,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct DeduplicationAnalysis {
83 pub total_resources: usize,
85 pub unique_resources: usize,
87 pub duplicate_count: usize,
89 pub duplicate_groups: HashMap<String, Vec<String>>,
91 pub potential_savings: u64,
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct CompressionAnalysis {
98 pub compressible_resources: Vec<CompressibleResource>,
100 pub well_compressed_count: usize,
102 pub potential_savings: u64,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct CompressibleResource {
109 pub name: String,
111 pub current_size: u64,
113 pub estimated_compressed_size: u64,
115 pub potential_savings: u64,
117 pub compression_ratio: f64,
119}
120
121pub struct PackageOptimizer {
123 pub min_compression_size: u64,
125 pub min_compression_ratio: f64,
127 pub enable_deduplication: bool,
129}
130
131impl PackageOptimizer {
132 pub fn new() -> Self {
134 Self {
135 min_compression_size: 1024, min_compression_ratio: 0.7, enable_deduplication: true,
138 }
139 }
140
141 pub fn analyze(&self, package: &Package) -> Result<OptimizationReport> {
143 let original_size = self.calculate_package_size(package);
144
145 let deduplication = if self.enable_deduplication {
147 self.analyze_deduplication(package)
148 } else {
149 DeduplicationAnalysis {
150 total_resources: 0,
151 unique_resources: 0,
152 duplicate_count: 0,
153 duplicate_groups: HashMap::new(),
154 potential_savings: 0,
155 }
156 };
157
158 let compression = self.analyze_compression(package)?;
159
160 let mut opportunities = Vec::new();
162
163 if deduplication.duplicate_count > 0 {
165 opportunities.push(OptimizationOpportunity {
166 optimization_type: OptimizationType::Deduplication,
167 description: format!(
168 "Found {} duplicate resources that could be deduplicated",
169 deduplication.duplicate_count
170 ),
171 potential_savings: deduplication.potential_savings,
172 priority: 5,
173 affected_resources: deduplication
174 .duplicate_groups
175 .values()
176 .flatten()
177 .cloned()
178 .collect(),
179 });
180 }
181
182 for resource in &compression.compressible_resources {
184 if resource.potential_savings > self.min_compression_size {
185 opportunities.push(OptimizationOpportunity {
186 optimization_type: OptimizationType::AddCompression,
187 description: format!(
188 "Resource '{}' could be compressed to save {}",
189 resource.name,
190 format_file_size(resource.potential_savings)
191 ),
192 potential_savings: resource.potential_savings,
193 priority: if resource.compression_ratio < 0.5 {
194 4
195 } else {
196 3
197 },
198 affected_resources: vec![resource.name.clone()],
199 });
200 }
201 }
202
203 let total_savings = deduplication.potential_savings + compression.potential_savings;
205 let optimized_size = original_size.saturating_sub(total_savings);
206 let savings_percent = if original_size > 0 {
207 (total_savings as f64 / original_size as f64) * 100.0
208 } else {
209 0.0
210 };
211
212 opportunities.sort_by(|a, b| b.priority.cmp(&a.priority));
214
215 Ok(OptimizationReport {
216 original_size,
217 optimized_size,
218 savings: total_savings,
219 savings_percent,
220 opportunities,
221 deduplication,
222 compression,
223 })
224 }
225
226 fn calculate_package_size(&self, package: &Package) -> u64 {
228 package.resources().values().map(|r| r.size() as u64).sum()
229 }
230
231 fn analyze_deduplication(&self, package: &Package) -> DeduplicationAnalysis {
233 let mut hash_to_resources: HashMap<String, Vec<String>> = HashMap::new();
234 let mut hash_to_size: HashMap<String, u64> = HashMap::new();
235
236 for (name, resource) in package.resources() {
238 let hash = resource.sha256();
239 let size = resource.size() as u64;
240
241 hash_to_resources
242 .entry(hash.clone())
243 .or_insert_with(Vec::new)
244 .push(name.clone());
245
246 hash_to_size.insert(hash, size);
247 }
248
249 let duplicate_groups: HashMap<String, Vec<String>> = hash_to_resources
251 .iter()
252 .filter(|(_, resources)| resources.len() > 1)
253 .map(|(hash, resources)| (hash.clone(), resources.clone()))
254 .collect();
255
256 let duplicate_count: usize = duplicate_groups
257 .values()
258 .map(|v| v.len() - 1) .sum();
260
261 let duplicate_savings: u64 = duplicate_groups
263 .iter()
264 .map(|(hash, resources)| {
265 let size = hash_to_size.get(hash).copied().unwrap_or(0);
266 size * (resources.len() as u64 - 1) })
268 .sum();
269
270 let total_resources = package.resources().len();
271 let unique_resources = hash_to_resources.len();
272
273 DeduplicationAnalysis {
274 total_resources,
275 unique_resources,
276 duplicate_count,
277 duplicate_groups,
278 potential_savings: duplicate_savings,
279 }
280 }
281
282 fn analyze_compression(&self, package: &Package) -> Result<CompressionAnalysis> {
284 let mut compressible_resources = Vec::new();
285 let mut well_compressed_count = 0;
286 let mut total_savings = 0u64;
287
288 for (name, resource) in package.resources() {
290 let size = resource.size() as u64;
291
292 if size < self.min_compression_size {
294 continue;
295 }
296
297 if resource.is_compressed() {
299 well_compressed_count += 1;
300 continue;
301 }
302
303 let compression_ratio = estimate_compression_ratio_by_type(&resource.resource_type);
305
306 if compression_ratio <= self.min_compression_ratio {
307 let estimated_compressed = (size as f64 * compression_ratio) as u64;
308 let savings = size.saturating_sub(estimated_compressed);
309
310 compressible_resources.push(CompressibleResource {
311 name: name.clone(),
312 current_size: size,
313 estimated_compressed_size: estimated_compressed,
314 potential_savings: savings,
315 compression_ratio,
316 });
317
318 total_savings += savings;
319 } else {
320 well_compressed_count += 1;
321 }
322 }
323
324 Ok(CompressionAnalysis {
325 compressible_resources,
326 well_compressed_count,
327 potential_savings: total_savings,
328 })
329 }
330
331 pub fn optimize(&self, package: &mut Package) -> Result<OptimizationReport> {
333 let report = self.analyze(package)?;
334
335 if self.enable_deduplication && !report.deduplication.duplicate_groups.is_empty() {
337 self.apply_deduplication(package, &report.deduplication)?;
338 }
339
340 Ok(report)
344 }
345
346 fn apply_deduplication(
348 &self,
349 package: &mut Package,
350 analysis: &DeduplicationAnalysis,
351 ) -> Result<()> {
352 for (_hash, resource_names) in &analysis.duplicate_groups {
354 if resource_names.len() > 1 {
355 for name in &resource_names[1..] {
357 package.resources_mut().remove(name);
358 }
359 }
360 }
361
362 Ok(())
363 }
364}
365
366impl Default for PackageOptimizer {
367 fn default() -> Self {
368 Self::new()
369 }
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375
376 #[test]
377 fn test_optimizer_creation() {
378 let optimizer = PackageOptimizer::new();
379 assert_eq!(optimizer.min_compression_size, 1024);
380 assert!(optimizer.enable_deduplication);
381 }
382
383 #[test]
384 fn test_optimization_type() {
385 let opt_type = OptimizationType::Deduplication;
386 assert_eq!(opt_type, OptimizationType::Deduplication);
387 }
388
389 #[test]
390 fn test_default_optimizer() {
391 let optimizer = PackageOptimizer::default();
392 assert_eq!(optimizer.min_compression_size, 1024);
393 }
394
395 #[test]
396 fn test_deduplication_analysis() {
397 let analysis = DeduplicationAnalysis {
398 total_resources: 10,
399 unique_resources: 7,
400 duplicate_count: 3,
401 duplicate_groups: HashMap::new(),
402 potential_savings: 1024,
403 };
404
405 assert_eq!(analysis.total_resources, 10);
406 assert_eq!(analysis.duplicate_count, 3);
407 assert_eq!(analysis.potential_savings, 1024);
408 }
409
410 #[test]
411 fn test_compressible_resource() {
412 let resource = CompressibleResource {
413 name: "test.txt".to_string(),
414 current_size: 10000,
415 estimated_compressed_size: 3000,
416 potential_savings: 7000,
417 compression_ratio: 0.3,
418 };
419
420 assert_eq!(resource.current_size, 10000);
421 assert_eq!(resource.potential_savings, 7000);
422 assert!(resource.compression_ratio < 0.5);
423 }
424}