1use std::fs;
8use std::path::Path;
9
10use crate::crypto::SecretKey;
11use crate::diff::types::{DiffKind, TreeDiff};
12use crate::index::WorkspaceIndex;
13
14use crate::staged::read_staged_blob;
15use crate::{cid, refs, ContentHash, Result, VoidContext};
16
17use crate::cid::VoidCid;
18
19const MAX_DIFF_SIZE: usize = 1024 * 1024; const BINARY_PROBE_SIZE: usize = 8192;
24
25#[derive(Debug, Clone)]
27pub struct ContentDiff {
28 pub path: String,
30 pub kind: DiffKind,
32 pub binary: bool,
34 pub too_large: bool,
36 pub hunks: Vec<Hunk>,
38 pub rename_from: Option<String>,
40}
41
42#[derive(Debug, Clone)]
44pub struct Hunk {
45 pub old_start: usize,
47 pub old_count: usize,
49 pub new_start: usize,
51 pub new_count: usize,
53 pub lines: Vec<DiffLine>,
55}
56
57#[derive(Debug, Clone)]
59pub struct DiffLine {
60 pub tag: char,
62 pub content: String,
64}
65
66fn is_binary(content: &[u8]) -> bool {
72 let probe = &content[..content.len().min(BINARY_PROBE_SIZE)];
73 probe.contains(&0)
74}
75
76fn generate_hunks(old: &str, new: &str) -> Vec<Hunk> {
82 let diff = similar::TextDiff::from_lines(old, new);
83 let mut hunks = Vec::new();
84
85 for group in diff.grouped_ops(3) {
86 let mut lines = Vec::new();
87 let mut old_start = 0;
88 let mut old_count = 0;
89 let mut new_start = 0;
90 let mut new_count = 0;
91
92 for op in &group {
93 if lines.is_empty() {
95 old_start = op.old_range().start;
96 new_start = op.new_range().start;
97 }
98
99 match op.tag() {
100 similar::DiffTag::Equal => {
101 for value in diff.iter_changes(op) {
102 lines.push(DiffLine {
103 tag: ' ',
104 content: value.value().trim_end_matches('\n').to_string(),
105 });
106 old_count += 1;
107 new_count += 1;
108 }
109 }
110 similar::DiffTag::Delete => {
111 for value in diff.iter_changes(op) {
112 lines.push(DiffLine {
113 tag: '-',
114 content: value.value().trim_end_matches('\n').to_string(),
115 });
116 old_count += 1;
117 }
118 }
119 similar::DiffTag::Insert => {
120 for value in diff.iter_changes(op) {
121 lines.push(DiffLine {
122 tag: '+',
123 content: value.value().trim_end_matches('\n').to_string(),
124 });
125 new_count += 1;
126 }
127 }
128 similar::DiffTag::Replace => {
129 for value in diff.iter_changes(op) {
130 match value.tag() {
131 similar::ChangeTag::Delete => {
132 lines.push(DiffLine {
133 tag: '-',
134 content: value.value().trim_end_matches('\n').to_string(),
135 });
136 old_count += 1;
137 }
138 similar::ChangeTag::Insert => {
139 lines.push(DiffLine {
140 tag: '+',
141 content: value.value().trim_end_matches('\n').to_string(),
142 });
143 new_count += 1;
144 }
145 similar::ChangeTag::Equal => {
146 lines.push(DiffLine {
147 tag: ' ',
148 content: value.value().trim_end_matches('\n').to_string(),
149 });
150 old_count += 1;
151 new_count += 1;
152 }
153 }
154 }
155 }
156 }
157 }
158
159 hunks.push(Hunk {
160 old_start: old_start + 1, old_count,
162 new_start: new_start + 1, new_count,
164 lines,
165 });
166 }
167
168 hunks
169}
170
171fn diff_single_file(
177 path: &str,
178 kind: DiffKind,
179 old_content: Option<&[u8]>,
180 new_content: Option<&[u8]>,
181) -> ContentDiff {
182 let rename_from = match &kind {
183 DiffKind::Renamed { from, .. } => Some(from.clone()),
184 _ => None,
185 };
186
187 let old_bytes = old_content.unwrap_or(&[]);
188 let new_bytes = new_content.unwrap_or(&[]);
189
190 if old_bytes.len() > MAX_DIFF_SIZE || new_bytes.len() > MAX_DIFF_SIZE {
192 return ContentDiff {
193 path: path.to_string(),
194 kind,
195 binary: false,
196 too_large: true,
197 hunks: Vec::new(),
198 rename_from,
199 };
200 }
201
202 if (!old_bytes.is_empty() && is_binary(old_bytes))
204 || (!new_bytes.is_empty() && is_binary(new_bytes))
205 {
206 return ContentDiff {
207 path: path.to_string(),
208 kind,
209 binary: true,
210 too_large: false,
211 hunks: Vec::new(),
212 rename_from,
213 };
214 }
215
216 let old_str = match std::str::from_utf8(old_bytes) {
218 Ok(s) => s,
219 Err(_) => {
220 return ContentDiff {
221 path: path.to_string(),
222 kind,
223 binary: true,
224 too_large: false,
225 hunks: Vec::new(),
226 rename_from,
227 };
228 }
229 };
230 let new_str = match std::str::from_utf8(new_bytes) {
231 Ok(s) => s,
232 Err(_) => {
233 return ContentDiff {
234 path: path.to_string(),
235 kind,
236 binary: true,
237 too_large: false,
238 hunks: Vec::new(),
239 rename_from,
240 };
241 }
242 };
243
244 let hunks = generate_hunks(old_str, new_str);
245
246 ContentDiff {
247 path: path.to_string(),
248 kind,
249 binary: false,
250 too_large: false,
251 hunks,
252 rename_from,
253 }
254}
255
256struct CommitFiles {
262 store: crate::store::FsStore,
263 commit: crate::metadata::Commit,
264 reader: crate::crypto::CommitReader,
265 ancestor_keys: Vec<void_crypto::ContentKey>,
266}
267
268impl CommitFiles {
269 fn load(ctx: &VoidContext, commit_cid: &VoidCid) -> Result<Self> {
270 let store = ctx.open_store()?;
271 let (commit, reader) = ctx.load_commit(&store, commit_cid)?;
272 let ancestor_keys =
273 crate::crypto::collect_ancestor_content_keys_vault(&ctx.crypto.vault, &store, &commit);
274 Ok(Self { store, commit, reader, ancestor_keys })
275 }
276
277 fn read_file(&self, ctx: &VoidContext, path: &str) -> Option<Vec<u8>> {
278 ctx.read_file_from_commit(&self.store, &self.commit, &self.reader, &self.ancestor_keys, path)
279 .ok()
280 .map(|fc| fc.into())
281 }
282}
283
284fn read_file_from_workspace(workspace: &Path, path: &str) -> Option<Vec<u8>> {
286 let full = workspace.join(path);
287 fs::read(&full).ok()
288}
289
290fn read_file_from_staged(
292 void_dir: &Path,
293 staged_key: &SecretKey,
294 content_hash: &ContentHash,
295) -> Option<Vec<u8>> {
296 read_staged_blob(void_dir, staged_key, content_hash).ok()
297}
298
299pub fn content_diff_index(
308 tree_diff: &TreeDiff,
309 index: &WorkspaceIndex,
310 workspace: &Path,
311 void_dir: &Path,
312 staged_key: &SecretKey,
313) -> Result<Vec<ContentDiff>> {
314 let index_map: std::collections::HashMap<&str, ContentHash> = index
316 .iter()
317 .map(|e| (e.path.as_str(), e.content_hash))
318 .collect();
319
320 let diffs = tree_diff
321 .files
322 .iter()
323 .map(|file| {
324 let old_path = match &file.kind {
325 DiffKind::Renamed { from, .. } => from.as_str(),
326 _ => file.path.as_str(),
327 };
328
329 let old_content = index_map
330 .get(old_path)
331 .and_then(|hash| read_file_from_staged(void_dir, staged_key, hash));
332
333 let new_content = match &file.kind {
334 DiffKind::Deleted => None,
335 _ => read_file_from_workspace(workspace, &file.path),
336 };
337
338 diff_single_file(
339 &file.path,
340 file.kind.clone(),
341 old_content.as_deref(),
342 new_content.as_deref(),
343 )
344 })
345 .collect();
346
347 Ok(diffs)
348}
349
350pub fn content_diff_working(
355 tree_diff: &TreeDiff,
356 ctx: &VoidContext,
357 commit_cid: &VoidCid,
358 workspace: &Path,
359) -> Result<Vec<ContentDiff>> {
360 let files = CommitFiles::load(ctx, commit_cid)?;
361
362 let diffs = tree_diff
363 .files
364 .iter()
365 .map(|file| {
366 let old_path = match &file.kind {
367 DiffKind::Renamed { from, .. } => from.as_str(),
368 _ => file.path.as_str(),
369 };
370
371 let old_content = files.read_file(ctx, old_path);
372
373 let new_content = match &file.kind {
374 DiffKind::Deleted => None,
375 _ => read_file_from_workspace(workspace, &file.path),
376 };
377
378 diff_single_file(
379 &file.path,
380 file.kind.clone(),
381 old_content.as_deref(),
382 new_content.as_deref(),
383 )
384 })
385 .collect();
386
387 Ok(diffs)
388}
389
390pub fn content_diff_commits(
395 tree_diff: &TreeDiff,
396 ctx: &VoidContext,
397 old_cid: &VoidCid,
398 new_cid: &VoidCid,
399) -> Result<Vec<ContentDiff>> {
400 let old_files = CommitFiles::load(ctx, old_cid)?;
401 let new_files = CommitFiles::load(ctx, new_cid)?;
402
403 let diffs = tree_diff
404 .files
405 .iter()
406 .map(|file| {
407 let old_path = match &file.kind {
408 DiffKind::Renamed { from, .. } => from.as_str(),
409 _ => file.path.as_str(),
410 };
411
412 let old_content = old_files.read_file(ctx, old_path);
413
414 let new_content = match &file.kind {
415 DiffKind::Deleted => None,
416 _ => new_files.read_file(ctx, &file.path),
417 };
418
419 diff_single_file(
420 &file.path,
421 file.kind.clone(),
422 old_content.as_deref(),
423 new_content.as_deref(),
424 )
425 })
426 .collect();
427
428 Ok(diffs)
429}
430
431pub fn content_diff_staged(
436 tree_diff: &TreeDiff,
437 ctx: &VoidContext,
438) -> Result<Vec<ContentDiff>> {
439 let head_commit_cid = refs::resolve_head(&ctx.paths.void_dir)?;
441 let head_files = match head_commit_cid {
442 Some(commit_cid) => {
443 let head_cid = cid::from_bytes(commit_cid.as_bytes())?;
444 Some(CommitFiles::load(ctx, &head_cid)?)
445 }
446 None => None,
447 };
448
449 let staged_key = ctx.crypto.vault.staged_key()?;
450 let void_dir = ctx.paths.void_dir.as_std_path();
451
452 let diffs = tree_diff
453 .files
454 .iter()
455 .map(|file| {
456 let old_path = match &file.kind {
457 DiffKind::Renamed { from, .. } => from.as_str(),
458 _ => file.path.as_str(),
459 };
460
461 let old_content = head_files.as_ref().and_then(|f| f.read_file(ctx, old_path));
462
463 let new_content = match &file.kind {
464 DiffKind::Deleted => None,
465 _ => file
466 .new_hash
467 .as_ref()
468 .and_then(|hash| read_file_from_staged(void_dir, staged_key, hash)),
469 };
470
471 diff_single_file(
472 &file.path,
473 file.kind.clone(),
474 old_content.as_deref(),
475 new_content.as_deref(),
476 )
477 })
478 .collect();
479
480 Ok(diffs)
481}
482
483#[cfg(test)]
484mod tests {
485 use super::*;
486
487 #[test]
488 fn binary_detection_null_byte() {
489 assert!(is_binary(b"hello\x00world"));
490 assert!(!is_binary(b"hello world"));
491 assert!(!is_binary(b""));
492 }
493
494 #[test]
495 fn hunk_generation_simple_add() {
496 let old = "line1\nline2\nline3\n";
497 let new = "line1\nline2\nnew line\nline3\n";
498
499 let hunks = generate_hunks(old, new);
500 assert_eq!(hunks.len(), 1);
501
502 let hunk = &hunks[0];
503 assert!(hunk.lines.iter().any(|l| l.tag == '+' && l.content == "new line"));
504 }
505
506 #[test]
507 fn hunk_generation_simple_delete() {
508 let old = "line1\nline2\nline3\n";
509 let new = "line1\nline3\n";
510
511 let hunks = generate_hunks(old, new);
512 assert_eq!(hunks.len(), 1);
513
514 let hunk = &hunks[0];
515 assert!(hunk.lines.iter().any(|l| l.tag == '-' && l.content == "line2"));
516 }
517
518 #[test]
519 fn hunk_generation_modify() {
520 let old = "aaa\nbbb\nccc\n";
521 let new = "aaa\nBBB\nccc\n";
522
523 let hunks = generate_hunks(old, new);
524 assert_eq!(hunks.len(), 1);
525
526 let hunk = &hunks[0];
527 assert!(hunk.lines.iter().any(|l| l.tag == '-' && l.content == "bbb"));
528 assert!(hunk.lines.iter().any(|l| l.tag == '+' && l.content == "BBB"));
529 }
530
531 #[test]
532 fn diff_single_file_binary_detected() {
533 let old = b"hello\x00binary";
534 let new = b"changed\x00binary";
535
536 let result = diff_single_file("test.bin", DiffKind::Modified, Some(old), Some(new));
537 assert!(result.binary);
538 assert!(result.hunks.is_empty());
539 }
540
541 #[test]
542 fn diff_single_file_too_large() {
543 let big = vec![b'x'; MAX_DIFF_SIZE + 1];
544
545 let result = diff_single_file("big.txt", DiffKind::Modified, Some(&big), Some(b"small"));
546 assert!(result.too_large);
547 assert!(result.hunks.is_empty());
548 }
549
550 #[test]
551 fn diff_single_file_added() {
552 let new = b"line1\nline2\n";
553
554 let result = diff_single_file("new.txt", DiffKind::Added, None, Some(new));
555 assert!(!result.binary);
556 assert!(!result.too_large);
557 assert_eq!(result.hunks.len(), 1);
558 assert!(result.hunks[0].lines.iter().all(|l| l.tag == '+'));
559 }
560
561 #[test]
562 fn diff_single_file_deleted() {
563 let old = b"line1\nline2\n";
564
565 let result = diff_single_file("old.txt", DiffKind::Deleted, Some(old), None);
566 assert_eq!(result.hunks.len(), 1);
567 assert!(result.hunks[0].lines.iter().all(|l| l.tag == '-'));
568 }
569
570 #[test]
571 fn empty_file_diff() {
572 let hunks = generate_hunks("", "");
573 assert!(hunks.is_empty());
574 }
575
576 #[test]
577 fn non_utf8_treated_as_binary() {
578 let old = &[0xFF, 0xFE, 0x41, 0x42]; let new = b"hello";
580
581 let result = diff_single_file("test.dat", DiffKind::Modified, Some(old), Some(new));
582 assert!(result.binary);
583 }
584}