context_builder/
state.rs

1//! Project state representation for context-builder.
2//!
3//! This module provides structured data types to represent the state of a project
4//! at a point in time. This replaces the previous approach of caching generated
5//! markdown and enables more robust diff generation.
6
7use chrono::Utc;
8use ignore::DirEntry;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::path::{Path, PathBuf};
12use std::time::SystemTime;
13
14use crate::config::Config;
15use crate::diff::{PerFileDiff, PerFileStatus, diff_file_contents};
16
17/// Complete state representation of a project at a point in time
18#[derive(Serialize, Deserialize, Debug, Clone)]
19pub struct ProjectState {
20    /// Timestamp when this state was captured
21    pub timestamp: String,
22    /// Hash of the configuration used to generate this state
23    pub config_hash: String,
24    /// Map of file paths to their state information
25    pub files: BTreeMap<PathBuf, FileState>,
26    /// Project metadata
27    pub metadata: ProjectMetadata,
28}
29
30/// State information for a single file
31#[derive(Serialize, Deserialize, Debug, Clone)]
32pub struct FileState {
33    /// Raw file content as string
34    pub content: String,
35    /// File size in bytes
36    pub size: u64,
37    /// Last modified time
38    pub modified: SystemTime,
39    /// Content hash for quick comparison
40    pub content_hash: String,
41}
42
43/// Metadata about the project
44#[derive(Serialize, Deserialize, Debug, Clone)]
45pub struct ProjectMetadata {
46    /// Project directory name
47    pub project_name: String,
48    /// Total number of files processed
49    pub file_count: usize,
50    /// Filters applied during processing
51    pub filters: Vec<String>,
52    /// Ignore patterns applied
53    pub ignores: Vec<String>,
54    /// Whether line numbers were enabled
55    pub line_numbers: bool,
56}
57
58/// Result of comparing two project states
59#[derive(Debug, Clone)]
60pub struct StateComparison {
61    /// Per-file differences
62    pub file_diffs: Vec<PerFileDiff>,
63    /// Summary of changes
64    pub summary: ChangeSummary,
65}
66
67/// Summary of changes between two states
68#[derive(Debug, Clone)]
69pub struct ChangeSummary {
70    /// Files that were added
71    pub added: Vec<PathBuf>,
72    /// Files that were removed
73    pub removed: Vec<PathBuf>,
74    /// Files that were modified
75    pub modified: Vec<PathBuf>,
76    /// Total number of changed files
77    pub total_changes: usize,
78}
79
80impl ProjectState {
81    /// Create a new project state from collected files
82    pub fn from_files(
83        files: &[DirEntry],
84        base_path: &Path,
85        config: &Config,
86        line_numbers: bool,
87    ) -> std::io::Result<Self> {
88        let mut file_states = BTreeMap::new();
89
90        // Ensure paths stored in the state are *always* relative (never absolute).
91        // This keeps cache stable across different launch contexts and matches
92        // test expectations. We attempt a few strategies to derive a relative path.
93        let cwd = std::env::current_dir().unwrap_or_else(|_| base_path.to_path_buf());
94        for entry in files {
95            let entry_path = entry.path();
96
97            let relative_path = entry_path
98                // Preferred: relative to provided base_path (common case when input is absolute)
99                .strip_prefix(base_path)
100                .or_else(|_| entry_path.strip_prefix(&cwd))
101                .map(|p| p.to_path_buf())
102                .unwrap_or_else(|_| {
103                    // Fallback: last component (file name) to avoid leaking absolute paths
104                    entry_path
105                        .file_name()
106                        .map(PathBuf::from)
107                        .unwrap_or_else(|| entry_path.to_path_buf())
108                });
109
110            let file_state = FileState::from_path(entry_path)?;
111            file_states.insert(relative_path, file_state);
112        }
113
114        let project_name = base_path
115            .file_name()
116            .and_then(|n| n.to_str())
117            .unwrap_or("unknown")
118            .to_string();
119
120        let metadata = ProjectMetadata {
121            project_name,
122            file_count: files.len(),
123            filters: config.filter.clone().unwrap_or_default(),
124            ignores: config.ignore.clone().unwrap_or_default(),
125            line_numbers,
126        };
127
128        Ok(ProjectState {
129            timestamp: Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(),
130            config_hash: Self::compute_config_hash(config),
131            files: file_states,
132            metadata,
133        })
134    }
135
136    /// Compare this state with a previous state
137    pub fn compare_with(&self, previous: &ProjectState) -> StateComparison {
138        // Convert file states to content maps for diff_file_contents
139        let previous_content: std::collections::HashMap<String, String> = previous
140            .files
141            .iter()
142            .map(|(path, state)| (path.to_string_lossy().to_string(), state.content.clone()))
143            .collect();
144
145        let current_content: std::collections::HashMap<String, String> = self
146            .files
147            .iter()
148            .map(|(path, state)| (path.to_string_lossy().to_string(), state.content.clone()))
149            .collect();
150
151        // Generate per-file diffs
152        let file_diffs = diff_file_contents(&previous_content, &current_content, true, None);
153
154        // Generate summary
155        let mut added = Vec::new();
156        let mut removed = Vec::new();
157        let mut modified = Vec::new();
158
159        for diff in &file_diffs {
160            let path = PathBuf::from(&diff.path);
161            match diff.status {
162                PerFileStatus::Added => added.push(path),
163                PerFileStatus::Removed => removed.push(path),
164                PerFileStatus::Modified => modified.push(path),
165                PerFileStatus::Unchanged => {}
166            }
167        }
168
169        let summary = ChangeSummary {
170            total_changes: added.len() + removed.len() + modified.len(),
171            added,
172            removed,
173            modified,
174        };
175
176        StateComparison {
177            file_diffs,
178            summary,
179        }
180    }
181
182    /// Check if this state has any content changes compared to another
183    pub fn has_changes(&self, other: &ProjectState) -> bool {
184        if self.files.len() != other.files.len() {
185            return true;
186        }
187
188        for (path, state) in &self.files {
189            match other.files.get(path) {
190                Some(other_state) => {
191                    if state.content_hash != other_state.content_hash {
192                        return true;
193                    }
194                }
195                None => return true,
196            }
197        }
198
199        false
200    }
201
202    /// Generate a configuration hash for cache validation
203    fn compute_config_hash(config: &Config) -> String {
204        use std::collections::hash_map::DefaultHasher;
205        use std::hash::{Hash, Hasher};
206
207        let mut hasher = DefaultHasher::new();
208        config.filter.hash(&mut hasher);
209        config.ignore.hash(&mut hasher);
210        config.line_numbers.hash(&mut hasher);
211        config.auto_diff.hash(&mut hasher);
212        config.diff_context_lines.hash(&mut hasher);
213
214        format!("{:x}", hasher.finish())
215    }
216}
217
218impl FileState {
219    /// Create a file state from a file path
220    pub fn from_path(path: &Path) -> std::io::Result<Self> {
221        use std::collections::hash_map::DefaultHasher;
222        use std::fs;
223        use std::hash::{Hash, Hasher};
224        use std::io::ErrorKind;
225
226        let metadata = fs::metadata(path)?;
227
228        let content = match fs::read_to_string(path) {
229            Ok(content) => content,
230            Err(e) if e.kind() == ErrorKind::InvalidData => {
231                // Handle binary files gracefully
232                log::warn!("Skipping binary file in auto-diff mode: {}", path.display());
233                format!("<Binary file - {} bytes>", metadata.len())
234            }
235            Err(e) => return Err(e),
236        };
237
238        // Compute content hash
239        let mut hasher = DefaultHasher::new();
240        content.hash(&mut hasher);
241        let content_hash = format!("{:x}", hasher.finish());
242
243        Ok(FileState {
244            content,
245            size: metadata.len(),
246            modified: metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH),
247            content_hash,
248        })
249    }
250}
251
252impl ChangeSummary {
253    /// Check if there are any changes
254    pub fn has_changes(&self) -> bool {
255        self.total_changes > 0
256    }
257
258    /// Generate markdown representation of the change summary
259    pub fn to_markdown(&self) -> String {
260        if !self.has_changes() {
261            return String::new();
262        }
263
264        let mut output = String::new();
265        output.push_str("## Change Summary\n\n");
266
267        for path in &self.added {
268            output.push_str(&format!("- Added: `{}`\n", path.display()));
269        }
270
271        for path in &self.removed {
272            output.push_str(&format!("- Removed: `{}`\n", path.display()));
273        }
274
275        for path in &self.modified {
276            output.push_str(&format!("- Modified: `{}`\n", path.display()));
277        }
278
279        output.push('\n');
280        output
281    }
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287    use std::fs;
288    use tempfile::tempdir;
289
290    #[test]
291    fn test_file_state_creation() {
292        let temp_dir = tempdir().unwrap();
293        let file_path = temp_dir.path().join("test.txt");
294        fs::write(&file_path, "Hello, world!").unwrap();
295
296        let file_state = FileState::from_path(&file_path).unwrap();
297
298        assert_eq!(file_state.content, "Hello, world!");
299        assert_eq!(file_state.size, 13);
300        assert!(!file_state.content_hash.is_empty());
301    }
302
303    #[test]
304    fn test_project_state_comparison() {
305        let temp_dir = tempdir().unwrap();
306        let base_path = temp_dir.path();
307
308        // Create initial files
309        fs::write(base_path.join("file1.txt"), "content1").unwrap();
310        fs::write(base_path.join("file2.txt"), "content2").unwrap();
311
312        let mut state1_files = BTreeMap::new();
313        state1_files.insert(
314            PathBuf::from("file1.txt"),
315            FileState::from_path(&base_path.join("file1.txt")).unwrap(),
316        );
317        state1_files.insert(
318            PathBuf::from("file2.txt"),
319            FileState::from_path(&base_path.join("file2.txt")).unwrap(),
320        );
321
322        let state1 = ProjectState {
323            timestamp: "2023-01-01T00:00:00Z".to_string(),
324            config_hash: "test_hash".to_string(),
325            files: state1_files,
326            metadata: ProjectMetadata {
327                project_name: "test".to_string(),
328                file_count: 2,
329                filters: vec![],
330                ignores: vec![],
331                line_numbers: false,
332            },
333        };
334
335        // Modify and create new state
336        fs::write(base_path.join("file1.txt"), "modified_content1").unwrap();
337        fs::write(base_path.join("file3.txt"), "content3").unwrap();
338
339        let mut state2_files = BTreeMap::new();
340        state2_files.insert(
341            PathBuf::from("file1.txt"),
342            FileState::from_path(&base_path.join("file1.txt")).unwrap(),
343        );
344        state2_files.insert(
345            PathBuf::from("file2.txt"),
346            FileState::from_path(&base_path.join("file2.txt")).unwrap(),
347        );
348        state2_files.insert(
349            PathBuf::from("file3.txt"),
350            FileState::from_path(&base_path.join("file3.txt")).unwrap(),
351        );
352
353        let state2 = ProjectState {
354            timestamp: "2023-01-01T01:00:00Z".to_string(),
355            config_hash: "test_hash".to_string(),
356            files: state2_files,
357            metadata: ProjectMetadata {
358                project_name: "test".to_string(),
359                file_count: 3,
360                filters: vec![],
361                ignores: vec![],
362                line_numbers: false,
363            },
364        };
365
366        let comparison = state2.compare_with(&state1);
367
368        assert_eq!(comparison.summary.added.len(), 1);
369        assert_eq!(comparison.summary.modified.len(), 1);
370        assert_eq!(comparison.summary.removed.len(), 0);
371        assert!(
372            comparison
373                .summary
374                .added
375                .contains(&PathBuf::from("file3.txt"))
376        );
377        assert!(
378            comparison
379                .summary
380                .modified
381                .contains(&PathBuf::from("file1.txt"))
382        );
383    }
384
385    #[test]
386    fn test_change_summary_markdown() {
387        let summary = ChangeSummary {
388            added: vec![PathBuf::from("new.txt")],
389            removed: vec![PathBuf::from("old.txt")],
390            modified: vec![PathBuf::from("changed.txt")],
391            total_changes: 3,
392        };
393
394        let markdown = summary.to_markdown();
395
396        assert!(markdown.contains("## Change Summary"));
397        assert!(markdown.contains("- Added: `new.txt`"));
398        assert!(markdown.contains("- Removed: `old.txt`"));
399        assert!(markdown.contains("- Modified: `changed.txt`"));
400    }
401
402    #[test]
403    fn test_binary_file_handling() {
404        let temp_dir = tempdir().unwrap();
405        let binary_file = temp_dir.path().join("test.bin");
406
407        // Write binary data (non-UTF8)
408        let binary_data = vec![0u8, 255, 128, 42, 0, 1, 2, 3];
409        fs::write(&binary_file, &binary_data).unwrap();
410
411        // Should not crash and should handle gracefully
412        let file_state = FileState::from_path(&binary_file).unwrap();
413
414        // Content should be a placeholder for binary files
415        assert!(file_state.content.contains("Binary file"));
416        assert!(file_state.content.contains("8 bytes"));
417        assert_eq!(file_state.size, 8);
418        assert!(!file_state.content_hash.is_empty());
419    }
420
421    #[test]
422    fn test_has_changes_identical_states() {
423        let temp_dir = tempdir().unwrap();
424        let base_path = temp_dir.path();
425        fs::write(base_path.join("test.txt"), "content").unwrap();
426
427        let mut files = BTreeMap::new();
428        files.insert(
429            PathBuf::from("test.txt"),
430            FileState::from_path(&base_path.join("test.txt")).unwrap(),
431        );
432
433        let state1 = ProjectState {
434            timestamp: "2023-01-01T00:00:00Z".to_string(),
435            config_hash: "hash1".to_string(),
436            files: files.clone(),
437            metadata: ProjectMetadata {
438                project_name: "test".to_string(),
439                file_count: 1,
440                filters: vec![],
441                ignores: vec![],
442                line_numbers: false,
443            },
444        };
445
446        let state2 = ProjectState {
447            timestamp: "2023-01-01T01:00:00Z".to_string(),
448            config_hash: "hash1".to_string(),
449            files,
450            metadata: ProjectMetadata {
451                project_name: "test".to_string(),
452                file_count: 1,
453                filters: vec![],
454                ignores: vec![],
455                line_numbers: false,
456            },
457        };
458
459        assert!(!state1.has_changes(&state2));
460    }
461
462    #[test]
463    fn test_has_changes_different_file_count() {
464        let temp_dir = tempdir().unwrap();
465        let base_path = temp_dir.path();
466        fs::write(base_path.join("test1.txt"), "content1").unwrap();
467        fs::write(base_path.join("test2.txt"), "content2").unwrap();
468
469        let mut files1 = BTreeMap::new();
470        files1.insert(
471            PathBuf::from("test1.txt"),
472            FileState::from_path(&base_path.join("test1.txt")).unwrap(),
473        );
474
475        let mut files2 = BTreeMap::new();
476        files2.insert(
477            PathBuf::from("test1.txt"),
478            FileState::from_path(&base_path.join("test1.txt")).unwrap(),
479        );
480        files2.insert(
481            PathBuf::from("test2.txt"),
482            FileState::from_path(&base_path.join("test2.txt")).unwrap(),
483        );
484
485        let state1 = ProjectState {
486            timestamp: "2023-01-01T00:00:00Z".to_string(),
487            config_hash: "hash1".to_string(),
488            files: files1,
489            metadata: ProjectMetadata {
490                project_name: "test".to_string(),
491                file_count: 1,
492                filters: vec![],
493                ignores: vec![],
494                line_numbers: false,
495            },
496        };
497
498        let state2 = ProjectState {
499            timestamp: "2023-01-01T01:00:00Z".to_string(),
500            config_hash: "hash1".to_string(),
501            files: files2,
502            metadata: ProjectMetadata {
503                project_name: "test".to_string(),
504                file_count: 2,
505                filters: vec![],
506                ignores: vec![],
507                line_numbers: false,
508            },
509        };
510
511        assert!(state1.has_changes(&state2));
512    }
513
514    #[test]
515    fn test_has_changes_content_different() {
516        let temp_dir = tempdir().unwrap();
517        let base_path = temp_dir.path();
518        fs::write(base_path.join("test.txt"), "content1").unwrap();
519
520        let file_state1 = FileState::from_path(&base_path.join("test.txt")).unwrap();
521
522        fs::write(base_path.join("test.txt"), "content2").unwrap();
523        let file_state2 = FileState::from_path(&base_path.join("test.txt")).unwrap();
524
525        let mut files1 = BTreeMap::new();
526        files1.insert(PathBuf::from("test.txt"), file_state1);
527
528        let mut files2 = BTreeMap::new();
529        files2.insert(PathBuf::from("test.txt"), file_state2);
530
531        let state1 = ProjectState {
532            timestamp: "2023-01-01T00:00:00Z".to_string(),
533            config_hash: "hash1".to_string(),
534            files: files1,
535            metadata: ProjectMetadata {
536                project_name: "test".to_string(),
537                file_count: 1,
538                filters: vec![],
539                ignores: vec![],
540                line_numbers: false,
541            },
542        };
543
544        let state2 = ProjectState {
545            timestamp: "2023-01-01T01:00:00Z".to_string(),
546            config_hash: "hash1".to_string(),
547            files: files2,
548            metadata: ProjectMetadata {
549                project_name: "test".to_string(),
550                file_count: 1,
551                filters: vec![],
552                ignores: vec![],
553                line_numbers: false,
554            },
555        };
556
557        assert!(state1.has_changes(&state2));
558    }
559
560    #[test]
561    fn test_config_hash_generation() {
562        let config1 = Config {
563            filter: Some(vec!["rs".to_string()]),
564            ignore: Some(vec!["target".to_string()]),
565            line_numbers: Some(true),
566            auto_diff: Some(false),
567            diff_context_lines: Some(3),
568            ..Default::default()
569        };
570
571        let config2 = Config {
572            filter: Some(vec!["rs".to_string()]),
573            ignore: Some(vec!["target".to_string()]),
574            line_numbers: Some(true),
575            auto_diff: Some(false),
576            diff_context_lines: Some(3),
577            ..Default::default()
578        };
579
580        let config3 = Config {
581            filter: Some(vec!["py".to_string()]), // Different filter
582            ignore: Some(vec!["target".to_string()]),
583            line_numbers: Some(true),
584            auto_diff: Some(false),
585            diff_context_lines: Some(3),
586            ..Default::default()
587        };
588
589        let hash1 = ProjectState::compute_config_hash(&config1);
590        let hash2 = ProjectState::compute_config_hash(&config2);
591        let hash3 = ProjectState::compute_config_hash(&config3);
592
593        assert_eq!(hash1, hash2);
594        assert_ne!(hash1, hash3);
595    }
596
597    #[test]
598    fn test_change_summary_no_changes() {
599        let summary = ChangeSummary {
600            added: vec![],
601            removed: vec![],
602            modified: vec![],
603            total_changes: 0,
604        };
605
606        assert!(!summary.has_changes());
607        assert_eq!(summary.to_markdown(), "");
608    }
609
610    #[test]
611    fn test_from_files_with_config() {
612        let temp_dir = tempdir().unwrap();
613        let base_path = temp_dir.path();
614
615        fs::write(base_path.join("test.rs"), "fn main() {}").unwrap();
616        fs::write(base_path.join("README.md"), "# Test").unwrap();
617
618        let entries = vec![
619            create_mock_dir_entry(&base_path.join("test.rs")),
620            create_mock_dir_entry(&base_path.join("README.md")),
621        ];
622
623        let config = Config {
624            filter: Some(vec!["rs".to_string()]),
625            ignore: Some(vec!["target".to_string()]),
626            line_numbers: Some(true),
627            ..Default::default()
628        };
629
630        let state = ProjectState::from_files(&entries, base_path, &config, true).unwrap();
631
632        assert_eq!(state.files.len(), 2);
633        assert_eq!(state.metadata.file_count, 2);
634        assert_eq!(state.metadata.filters, vec!["rs"]);
635        assert_eq!(state.metadata.ignores, vec!["target"]);
636        assert!(state.metadata.line_numbers);
637        assert!(!state.timestamp.is_empty());
638        assert!(!state.config_hash.is_empty());
639    }
640
641    #[test]
642    fn test_from_files_absolute_path_fallback() {
643        let temp_dir = tempdir().unwrap();
644        let base_path = temp_dir.path();
645
646        // Create a file in the temp dir
647        fs::write(base_path.join("test.txt"), "test content").unwrap();
648        let file_path = base_path.join("test.txt");
649
650        // Create entry with the file
651        let entry = create_mock_dir_entry(&file_path);
652
653        // Use a completely different base_path to force the fallback
654        let different_base = PathBuf::from("/completely/different/path");
655
656        let config = Config::default();
657
658        let state = ProjectState::from_files(&[entry], &different_base, &config, false).unwrap();
659
660        // Should fall back to just the filename
661        assert_eq!(state.files.len(), 1);
662        assert!(state.files.contains_key(&PathBuf::from("test.txt")));
663    }
664
665    #[test]
666    fn test_change_summary_with_unchanged_files() {
667        let changes = vec![
668            PerFileDiff {
669                path: "added.txt".to_string(),
670                status: PerFileStatus::Added,
671                diff: "diff content".to_string(),
672            },
673            PerFileDiff {
674                path: "unchanged.txt".to_string(),
675                status: PerFileStatus::Unchanged,
676                diff: "".to_string(),
677            },
678        ];
679
680        // Manually create the summary like the actual code does
681        let mut added = Vec::new();
682        let mut removed = Vec::new();
683        let mut modified = Vec::new();
684
685        for diff in &changes {
686            let path = PathBuf::from(&diff.path);
687            match diff.status {
688                PerFileStatus::Added => added.push(path),
689                PerFileStatus::Removed => removed.push(path),
690                PerFileStatus::Modified => modified.push(path),
691                PerFileStatus::Unchanged => {} // This line should be covered now
692            }
693        }
694
695        let summary = ChangeSummary {
696            total_changes: added.len() + removed.len() + modified.len(),
697            added,
698            removed,
699            modified,
700        };
701
702        assert_eq!(summary.total_changes, 1); // Only the added file counts
703        assert_eq!(summary.added.len(), 1);
704        assert_eq!(summary.removed.len(), 0);
705        assert_eq!(summary.modified.len(), 0);
706    }
707
708    #[test]
709    fn test_has_changes_with_missing_file() {
710        let temp_dir = tempdir().unwrap();
711        let base_path = temp_dir.path();
712
713        // Create files for the first state
714        fs::write(base_path.join("file1.txt"), "content1").unwrap();
715        let entry1 = create_mock_dir_entry(&base_path.join("file1.txt"));
716
717        let config = Config::default();
718        let state1 = ProjectState::from_files(&[entry1], base_path, &config, false).unwrap();
719
720        // Create a different state with different files
721        fs::write(base_path.join("file2.txt"), "content2").unwrap();
722        let entry2 = create_mock_dir_entry(&base_path.join("file2.txt"));
723        let state2 = ProjectState::from_files(&[entry2], base_path, &config, false).unwrap();
724
725        // Should detect changes because files are completely different
726        assert!(state1.has_changes(&state2));
727    }
728
729    #[test]
730    fn test_file_state_with_invalid_data_error() {
731        // Create a temporary file with binary content that might trigger InvalidData
732        let temp_dir = tempdir().unwrap();
733        let binary_file = temp_dir.path().join("binary.dat");
734
735        // Write invalid UTF-8 bytes
736        let binary_data = vec![0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA];
737        fs::write(&binary_file, &binary_data).unwrap();
738
739        // This might trigger the InvalidData error path, but since we can't guarantee it,
740        // we at least verify the function can handle binary files
741        let result = FileState::from_path(&binary_file);
742        assert!(result.is_ok());
743    }
744
745    // Helper function to create a mock DirEntry for testing
746    fn create_mock_dir_entry(path: &std::path::Path) -> ignore::DirEntry {
747        // This is a bit of a hack since DirEntry doesn't have a public constructor
748        // We use the ignore crate's WalkBuilder to create a real DirEntry
749        let walker = ignore::WalkBuilder::new(path.parent().unwrap());
750        walker
751            .build()
752            .filter_map(Result::ok)
753            .find(|entry| entry.path() == path)
754            .expect("Failed to create DirEntry for test")
755    }
756}