scribe_scaling/
profiling.rs

1//! Repository profiling for automatic type detection and configuration optimization.
2
3use crate::engine::ScalingConfig;
4use crate::error::ScalingResult;
5use serde::{Deserialize, Serialize};
6use std::path::Path;
7
8/// Repository types for classification
9#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
10pub enum RepositoryType {
11    Personal,
12    Library,
13    WebApp,
14    MobileApp,
15    SystemSoftware,
16    GameDev,
17    DataScience,
18    Enterprise,
19    Monorepo,
20    Documentation,
21    Unknown,
22}
23
24impl Default for RepositoryType {
25    fn default() -> Self {
26        Self::Unknown
27    }
28}
29
30/// Repository profile with characteristics
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct RepositoryProfile {
33    /// Detected repository type
34    pub repository_type: RepositoryType,
35
36    /// Total number of files
37    pub file_count: usize,
38
39    /// Total repository size in bytes
40    pub total_size: u64,
41
42    /// Average file size
43    pub average_file_size: u64,
44
45    /// Primary programming languages
46    pub primary_languages: Vec<String>,
47
48    /// Build system type
49    pub build_system: String,
50}
51
52impl RepositoryProfile {
53    /// Convert profile to optimal scaling configuration
54    pub fn to_scaling_config(&self) -> ScalingConfig {
55        match self.repository_type {
56            RepositoryType::Personal if self.file_count < 1000 => ScalingConfig::small_repository(),
57            RepositoryType::Enterprise | RepositoryType::Monorepo => {
58                ScalingConfig::large_repository()
59            }
60            _ => ScalingConfig::default(),
61        }
62    }
63}
64
65/// Repository profiler
66pub struct RepositoryProfiler {
67    // Simple profiler without complex state
68}
69
70impl RepositoryProfiler {
71    /// Create a new repository profiler
72    pub fn new() -> Self {
73        Self {}
74    }
75
76    /// Profile a repository and return its characteristics
77    pub async fn profile_repository(&self, path: &Path) -> ScalingResult<RepositoryProfile> {
78        // Basic profiling implementation
79        let mut file_count = 0;
80        let mut total_size = 0u64;
81        let mut languages = std::collections::HashMap::new();
82
83        for entry in walkdir::WalkDir::new(path).follow_links(false) {
84            if let Ok(entry) = entry {
85                if entry.file_type().is_file() {
86                    file_count += 1;
87                    if let Ok(metadata) = entry.metadata() {
88                        total_size += metadata.len();
89                    }
90
91                    // Simple language detection
92                    if let Some(ext) = entry.path().extension() {
93                        if let Some(ext_str) = ext.to_str() {
94                            *languages.entry(ext_str.to_string()).or_insert(0) += 1;
95                        }
96                    }
97                }
98            }
99        }
100
101        let average_file_size = if file_count > 0 {
102            total_size / file_count as u64
103        } else {
104            0
105        };
106
107        // Simple repository type detection
108        let repository_type = if file_count < 100 {
109            RepositoryType::Personal
110        } else if file_count > 10000 {
111            RepositoryType::Enterprise
112        } else {
113            RepositoryType::Library
114        };
115
116        // Get primary languages
117        let mut lang_vec: Vec<_> = languages.into_iter().collect();
118        lang_vec.sort_by(|a, b| b.1.cmp(&a.1));
119        let primary_languages = lang_vec.into_iter().take(3).map(|(lang, _)| lang).collect();
120
121        Ok(RepositoryProfile {
122            repository_type,
123            file_count,
124            total_size,
125            average_file_size,
126            primary_languages,
127            build_system: "Unknown".to_string(),
128        })
129    }
130
131    /// Quick estimate of processing requirements
132    pub async fn quick_estimate(
133        &self,
134        path: &Path,
135    ) -> ScalingResult<(usize, std::time::Duration, usize)> {
136        let profile = self.profile_repository(path).await?;
137
138        let estimated_duration = std::time::Duration::from_millis(
139            (profile.file_count as u64 * 10).min(30000), // Max 30 seconds
140        );
141
142        let estimated_memory = profile.file_count * 1024; // 1KB per file
143
144        Ok((profile.file_count, estimated_duration, estimated_memory))
145    }
146}
147
148impl Default for RepositoryProfiler {
149    fn default() -> Self {
150        Self::new()
151    }
152}