Skip to main content

st/mem8/
git_temporal.rs

1//! Git-based temporal timeline builder for MEM8
2//! Extracts project history directly from git to create wave memories
3
4use crate::mem8::{
5    integration::{ContentType, DirectoryHealth, DirectoryMetadata},
6    wave::{FrequencyBand, MemoryWave},
7    SmartTreeMem8,
8};
9use anyhow::{Context, Result};
10use chrono::{DateTime, Datelike, Utc};
11use std::path::Path;
12use std::process::Command;
13
14/// Git commit information
15#[derive(Debug, Clone)]
16pub struct GitCommit {
17    pub hash: String,
18    pub author: String,
19    pub timestamp: DateTime<Utc>,
20    pub message: String,
21    pub files_changed: Vec<String>,
22    pub additions: usize,
23    pub deletions: usize,
24}
25
26/// Git file history
27#[derive(Debug)]
28pub struct GitFileHistory {
29    pub path: String,
30    pub commits: Vec<GitCommit>,
31    pub total_changes: usize,
32    pub authors: Vec<String>,
33    pub first_seen: DateTime<Utc>,
34    pub last_modified: DateTime<Utc>,
35}
36
37/// Git-based temporal analyzer for MEM8
38pub struct GitTemporalAnalyzer {
39    repo_path: String,
40}
41
42impl GitTemporalAnalyzer {
43    pub fn new(repo_path: impl AsRef<Path>) -> Result<Self> {
44        let repo_path = repo_path.as_ref().to_string_lossy().to_string();
45
46        // Verify it's a git repository
47        Command::new("git")
48            .arg("-C")
49            .arg(&repo_path)
50            .arg("rev-parse")
51            .arg("--git-dir")
52            .output()
53            .context("Failed to verify git repository")?;
54
55        Ok(Self { repo_path })
56    }
57
58    /// Get complete project timeline
59    pub fn get_project_timeline(&self) -> Result<Vec<GitCommit>> {
60        let output = Command::new("git")
61            .arg("-C")
62            .arg(&self.repo_path)
63            .arg("log")
64            .arg("--pretty=format:%H|%an|%at|%s")
65            .arg("--numstat")
66            .arg("--no-merges")
67            .output()
68            .context("Failed to get git log")?;
69
70        let stdout = String::from_utf8_lossy(&output.stdout);
71        self.parse_git_log(&stdout)
72    }
73
74    /// Get file-specific history
75    pub fn get_file_history(&self, file_path: &str) -> Result<GitFileHistory> {
76        // Get commits that touched this file
77        let output = Command::new("git")
78            .arg("-C")
79            .arg(&self.repo_path)
80            .arg("log")
81            .arg("--follow")
82            .arg("--pretty=format:%H|%an|%at|%s")
83            .arg("--")
84            .arg(file_path)
85            .output()
86            .context("Failed to get file history")?;
87
88        let commits = self.parse_simple_log(&String::from_utf8_lossy(&output.stdout))?;
89
90        // Get unique authors
91        let mut authors: Vec<String> = commits.iter().map(|c| c.author.clone()).collect();
92        authors.sort();
93        authors.dedup();
94
95        Ok(GitFileHistory {
96            path: file_path.to_string(),
97            total_changes: commits.len(),
98            first_seen: commits.last().map(|c| c.timestamp).unwrap_or_else(Utc::now),
99            last_modified: commits
100                .first()
101                .map(|c| c.timestamp)
102                .unwrap_or_else(Utc::now),
103            authors,
104            commits,
105        })
106    }
107
108    /// Get activity heatmap (commits per day/week)
109    pub fn get_activity_heatmap(&self, days: usize) -> Result<Vec<(DateTime<Utc>, usize)>> {
110        let output = Command::new("git")
111            .arg("-C")
112            .arg(&self.repo_path)
113            .arg("log")
114            .arg(format!("--since={} days ago", days))
115            .arg("--pretty=format:%at")
116            .output()
117            .context("Failed to get activity data")?;
118
119        let stdout = String::from_utf8_lossy(&output.stdout);
120        let mut daily_commits = std::collections::HashMap::new();
121
122        for line in stdout.lines() {
123            if let Ok(timestamp) = line.parse::<i64>() {
124                let date = DateTime::<Utc>::from_timestamp(timestamp, 0).unwrap_or_else(Utc::now);
125                let day = date.date_naive();
126                *daily_commits.entry(day).or_insert(0) += 1;
127            }
128        }
129
130        let mut heatmap: Vec<_> = daily_commits
131            .into_iter()
132            .map(|(date, count)| {
133                let datetime = date
134                    .and_hms_opt(0, 0, 0)
135                    .unwrap()
136                    .and_local_timezone(Utc)
137                    .unwrap();
138                (datetime, count)
139            })
140            .collect();
141        heatmap.sort_by_key(|(date, _)| *date);
142
143        Ok(heatmap)
144    }
145
146    /// Analyze code churn (files that change frequently)
147    pub fn analyze_code_churn(&self, limit: usize) -> Result<Vec<(String, usize)>> {
148        let output = Command::new("git")
149            .arg("-C")
150            .arg(&self.repo_path)
151            .arg("log")
152            .arg("--name-only")
153            .arg("--pretty=format:")
154            .arg("--no-merges")
155            .output()
156            .context("Failed to analyze code churn")?;
157
158        let stdout = String::from_utf8_lossy(&output.stdout);
159        let mut file_changes = std::collections::HashMap::new();
160
161        for line in stdout.lines() {
162            if !line.is_empty() && !line.starts_with(' ') {
163                *file_changes.entry(line.to_string()).or_insert(0) += 1;
164            }
165        }
166
167        let mut churn: Vec<_> = file_changes.into_iter().collect();
168        churn.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
169        churn.truncate(limit);
170
171        Ok(churn)
172    }
173
174    /// Parse git log output
175    fn parse_git_log(&self, output: &str) -> Result<Vec<GitCommit>> {
176        let mut commits = Vec::new();
177        let mut current_commit: Option<GitCommit> = None;
178
179        for line in output.lines() {
180            if line.contains('|') && !line.starts_with(char::is_numeric) {
181                // Commit header line
182                if let Some(commit) = current_commit.take() {
183                    commits.push(commit);
184                }
185
186                let parts: Vec<&str> = line.split('|').collect();
187                if parts.len() >= 4 {
188                    let timestamp = parts[2].parse::<i64>().unwrap_or(0);
189                    current_commit = Some(GitCommit {
190                        hash: parts[0].to_string(),
191                        author: parts[1].to_string(),
192                        timestamp: DateTime::<Utc>::from_timestamp(timestamp, 0)
193                            .unwrap_or_else(Utc::now),
194                        message: parts[3..].join("|"),
195                        files_changed: Vec::new(),
196                        additions: 0,
197                        deletions: 0,
198                    });
199                }
200            } else if let Some(ref mut commit) = current_commit {
201                // File change line (numstat format)
202                let parts: Vec<&str> = line.split_whitespace().collect();
203                if parts.len() >= 3 {
204                    if let (Ok(adds), Ok(dels)) =
205                        (parts[0].parse::<usize>(), parts[1].parse::<usize>())
206                    {
207                        commit.additions += adds;
208                        commit.deletions += dels;
209                        commit.files_changed.push(parts[2].to_string());
210                    }
211                }
212            }
213        }
214
215        if let Some(commit) = current_commit {
216            commits.push(commit);
217        }
218
219        Ok(commits)
220    }
221
222    /// Parse simple log output (without numstat)
223    fn parse_simple_log(&self, output: &str) -> Result<Vec<GitCommit>> {
224        let mut commits = Vec::new();
225
226        for line in output.lines() {
227            let parts: Vec<&str> = line.split('|').collect();
228            if parts.len() >= 4 {
229                let timestamp = parts[2].parse::<i64>().unwrap_or(0);
230                commits.push(GitCommit {
231                    hash: parts[0].to_string(),
232                    author: parts[1].to_string(),
233                    timestamp: DateTime::<Utc>::from_timestamp(timestamp, 0)
234                        .unwrap_or_else(Utc::now),
235                    message: parts[3..].join("|"),
236                    files_changed: Vec::new(),
237                    additions: 0,
238                    deletions: 0,
239                });
240            }
241        }
242
243        Ok(commits)
244    }
245}
246
247/// Extension trait to integrate Git temporal data with MEM8
248impl SmartTreeMem8 {
249    /// Import git history as wave memories
250    pub fn import_git_timeline(&mut self, repo_path: impl AsRef<Path>) -> Result<()> {
251        let analyzer = GitTemporalAnalyzer::new(repo_path)?;
252
253        // Get project timeline
254        let timeline = analyzer.get_project_timeline()?;
255        println!("Importing {} commits into wave memory...", timeline.len());
256
257        // Get activity heatmap for the last 90 days
258        let heatmap = analyzer.get_activity_heatmap(90)?;
259        let _max_daily_commits = heatmap.iter().map(|(_, count)| *count).max().unwrap_or(1) as f32;
260
261        // Import each commit as a memory wave
262        for (idx, commit) in timeline.iter().enumerate() {
263            let days_ago = (Utc::now() - commit.timestamp).num_days() as f32;
264
265            // Determine frequency based on commit characteristics
266            let frequency = if commit.message.contains("fix") || commit.message.contains("bug") {
267                FrequencyBand::Technical.frequency(0.7) // Bug fixes are technical
268            } else if commit.message.contains("doc") || commit.message.contains("README") {
269                FrequencyBand::Conversational.frequency(0.5) // Documentation
270            } else if commit.additions > 500 || commit.deletions > 500 {
271                FrequencyBand::Implementation.frequency(0.8) // Major changes
272            } else if commit.files_changed.len() > 10 {
273                FrequencyBand::DeepStructural.frequency(0.6) // Structural refactoring
274            } else {
275                FrequencyBand::Technical.frequency(0.5) // Default
276            };
277
278            // Amplitude based on change size and recency
279            let change_factor = ((commit.additions + commit.deletions) as f32).log10() / 4.0;
280            let recency_factor = (-days_ago / 30.0).exp(); // Decay over 30 days
281            let amplitude = (change_factor * recency_factor).clamp(0.1, 1.0);
282
283            // Create memory wave
284            let mut wave = MemoryWave::new(frequency, amplitude);
285
286            // Emotional context based on commit patterns
287            wave.valence = if commit.message.contains("fix") || commit.message.contains("bug") {
288                -0.2 // Negative for bug fixes
289            } else if commit.message.contains("feat") || commit.message.contains("add") {
290                0.6 // Positive for new features
291            } else {
292                0.2 // Neutral positive
293            };
294
295            // Arousal based on change magnitude
296            wave.arousal = change_factor.clamp(0.1, 1.0);
297
298            // Store in temporal layer based on age
299            let z_layer = (idx as f32 / timeline.len() as f32 * 65535.0) as u16;
300
301            // Use author name for spatial distribution
302            let (x, y) = self.string_to_coordinates(&format!("{}-{}", commit.author, idx));
303
304            self.store_wave_at_coordinates(x, y, z_layer, wave)?;
305        }
306
307        // Import code churn patterns
308        let churn = analyzer.analyze_code_churn(20)?;
309        for (file_path, change_count) in churn {
310            let metadata = DirectoryMetadata {
311                primary_type: self.detect_content_type(&file_path),
312                importance: (change_count as f32 / 100.0).clamp(0.1, 1.0),
313                normalized_size: 0.5, // Unknown from git
314                health: if change_count > 50 {
315                    DirectoryHealth::Warning // High churn might indicate instability
316                } else {
317                    DirectoryHealth::Healthy
318                },
319                activity_level: (change_count as f32 / 20.0).clamp(0.1, 1.0),
320                days_since_modified: 0, // Will be overridden by actual file check
321            };
322
323            self.store_directory_memory(Path::new(&file_path), metadata)?;
324        }
325
326        println!("Git timeline imported successfully!");
327        Ok(())
328    }
329
330    /// Helper to detect content type from path
331    fn detect_content_type(&self, path: &str) -> ContentType {
332        if path.ends_with(".rs") || path.ends_with(".py") || path.ends_with(".js") {
333            ContentType::Code
334        } else if path.ends_with(".md") || path.contains("README") {
335            ContentType::Documentation
336        } else if path.ends_with(".toml") || path.ends_with(".json") || path.ends_with(".yaml") {
337            ContentType::Configuration
338        } else if path.contains("test") || path.contains("spec") {
339            ContentType::Code // Tests are code
340        } else {
341            ContentType::Data
342        }
343    }
344}
345
346/// Create temporal "grooves" in wave space from git patterns
347pub fn create_temporal_grooves(
348    mem8: &mut SmartTreeMem8,
349    repo_path: impl AsRef<Path>,
350) -> Result<()> {
351    let analyzer = GitTemporalAnalyzer::new(&repo_path)?;
352
353    // Get activity patterns
354    let heatmap = analyzer.get_activity_heatmap(365)?; // Last year
355
356    // Find periodic patterns (e.g., weekly sprints, monthly releases)
357    let mut weekly_pattern = [0f32; 7];
358    for (date, count) in &heatmap {
359        let weekday = date.weekday().num_days_from_monday() as usize;
360        weekly_pattern[weekday] += *count as f32;
361    }
362
363    // Normalize weekly pattern
364    let max_weekly = weekly_pattern.iter().copied().fold(0.0f32, f32::max);
365    if max_weekly > 0.0 {
366        for val in &mut weekly_pattern {
367            *val /= max_weekly;
368        }
369    }
370
371    // Create persistent wave patterns for discovered rhythms
372    for (day, &intensity) in weekly_pattern.iter().enumerate() {
373        if intensity > 0.2 {
374            let mut wave = MemoryWave::new(
375                FrequencyBand::Technical.frequency(intensity), // Use Technical for temporal patterns
376                intensity * 0.5,
377            );
378            wave.decay_tau = None; // Persistent pattern
379            wave.valence = 0.3; // Slightly positive
380
381            // Store in a "rhythm" layer
382            let x = (day * 36) as u8; // Spread across x-axis
383            let y = 128; // Middle of y-axis
384            let z = 60000; // High z-layer for persistent patterns
385
386            mem8.store_wave_at_coordinates(x, y, z, wave)?;
387        }
388    }
389
390    println!("Temporal grooves created from git patterns!");
391    Ok(())
392}
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397    use std::env;
398
399    #[test]
400    fn test_git_timeline_import() {
401        if env::var("CI").is_err() {
402            // Only run locally, not in CI
403            let mut mem8 = SmartTreeMem8::new();
404            if let Ok(()) = mem8.import_git_timeline(".") {
405                assert!(mem8.active_memory_count() > 0);
406            }
407        }
408    }
409}