Skip to main content

git_meta_lib/
serialize.rs

1//! Serialize local metadata to Git tree(s) and commit(s).
2//!
3//! This module implements the full serialization workflow: reading metadata
4//! from the SQLite store, building Git trees (full or incremental), creating
5//! commits, updating refs, and optionally auto-pruning old entries.
6//!
7//! The public entry point is [`run()`], which takes a [`Session`](crate::Session)
8//! and returns a [`SerializeOutput`] describing what was written.
9
10use std::collections::{BTreeMap, BTreeSet};
11
12use gix::bstr::ByteSlice;
13use gix::prelude::ObjectIdExt;
14use gix::refs::transaction::PreviousValue;
15
16use crate::db::types::{
17    ListTombstoneRecord, Operation, SerializableEntry, SetTombstoneRecord, TombstoneRecord,
18};
19use crate::db::Store;
20use crate::error::{Error, Result};
21use crate::list_value::{make_entry_name, parse_entries};
22use crate::prune::{self, PruneRules};
23use crate::session::Session;
24use crate::tree::filter::{classify_key, parse_filter_rules, MAIN_DEST};
25use crate::tree::format::{build_dir, build_tree_from_paths, insert_path, TreeDir};
26use crate::tree::model::Tombstone;
27use crate::tree_paths;
28use crate::types::{Target, TargetType, ValueType};
29
30/// Maximum number of individual change lines included in a commit message.
31const MAX_COMMIT_CHANGES: usize = 1000;
32
33/// Result of a serialize operation.
34///
35/// Contains all the information needed by a CLI or other consumer
36/// to report what happened, without performing any I/O itself.
37#[must_use]
38#[derive(Debug, Clone, PartialEq, Eq, Default)]
39pub struct SerializeOutput {
40    /// Number of metadata changes serialized (total entries across all destinations).
41    pub changes: usize,
42    /// Refs that were written, e.g. `["refs/meta/local/main"]`.
43    pub refs_written: Vec<String>,
44    /// Number of entries dropped by auto-prune (0 if no prune triggered).
45    pub pruned: u64,
46}
47
48/// Serialize local metadata to Git tree(s) and commit(s).
49///
50/// Determines incremental vs full mode automatically based on
51/// `last_materialized`, unless `force_full` is true. Applies filter routing
52/// and pruning rules. Updates local refs and the materialization timestamp.
53///
54/// # Parameters
55///
56/// - `session`: the gmeta session providing the repository, store, and config.
57/// - `now`: the current timestamp in milliseconds since the Unix epoch,
58///   used for the commit signature and the `last_materialized` marker.
59/// - `force_full`: when true, ignore incremental dirty-target detection and
60///   rebuild serialized trees from the complete SQLite state.
61///
62/// # Returns
63///
64/// A [`SerializeOutput`] with counts and written refs. If there is nothing
65/// to serialize, `changes` will be `0` and `refs_written` will be empty.
66///
67/// # Errors
68///
69/// Returns an error if database reads, Git object writes, or ref updates fail.
70pub fn run(session: &Session, now: i64, force_full: bool) -> Result<SerializeOutput> {
71    let repo = &session.repo;
72    let local_ref_name = session.local_ref();
73    let last_materialized = session.store.get_last_materialized()?;
74
75    // Determine existing tree for incremental mode
76    let existing_tree_oid = ref_tree_oid(repo, &local_ref_name)?;
77
78    // Determine incremental vs full mode and collect entries + changes
79    let (
80        metadata_entries,
81        tombstone_entries,
82        set_tombstone_entries,
83        list_tombstone_entries,
84        dirty_target_bases,
85        changes,
86    ) = if let (false, Some(since)) = (force_full, last_materialized) {
87        let modified = session.store.get_modified_since(since)?;
88        let metadata = session.store.get_all_metadata()?;
89        let changes: Vec<(char, String, String)> = if modified.is_empty() {
90            metadata.iter().map(metadata_add_change).collect()
91        } else {
92            modified
93                .iter()
94                .map(|entry| {
95                    let op_char = match entry.operation {
96                        Operation::Remove => 'D',
97                        Operation::Set => {
98                            if existing_tree_oid.is_some() {
99                                'M'
100                            } else {
101                                'A'
102                            }
103                        }
104                        _ => 'M',
105                    };
106                    let target_label = if entry.target_type == TargetType::Project {
107                        "project".to_string()
108                    } else {
109                        format!("{}:{}", entry.target_type, entry.target_value)
110                    };
111                    (op_char, target_label, entry.key.clone())
112                })
113                .collect()
114        };
115
116        // Compute dirty target base paths from modified entries
117        let mut dirty_bases: BTreeSet<String> = BTreeSet::new();
118        for entry in &modified {
119            let target = if entry.target_type == TargetType::Project {
120                Target::parse("project")?
121            } else {
122                Target::parse(&format!("{}:{}", entry.target_type, entry.target_value))?
123            };
124            dirty_bases.insert(tree_paths::tree_base_path(&target));
125        }
126
127        let tombstones = session.store.get_all_tombstones()?;
128        let set_tombstones = session.store.get_all_set_tombstones()?;
129        let list_tombstones = session.store.get_all_list_tombstones()?;
130
131        (
132            metadata,
133            tombstones,
134            set_tombstones,
135            list_tombstones,
136            if existing_tree_oid.is_some() && !modified.is_empty() {
137                Some(dirty_bases)
138            } else {
139                None
140            },
141            changes,
142        )
143    } else {
144        let metadata = session.store.get_all_metadata()?;
145
146        let changes: Vec<(char, String, String)> =
147            metadata.iter().map(metadata_add_change).collect();
148
149        (
150            metadata,
151            session.store.get_all_tombstones()?,
152            session.store.get_all_set_tombstones()?,
153            session.store.get_all_list_tombstones()?,
154            None,
155            changes,
156        )
157    };
158
159    if metadata_entries.is_empty() && tombstone_entries.is_empty() {
160        return Ok(SerializeOutput {
161            changes: 0,
162            refs_written: Vec::new(),
163            pruned: 0,
164        });
165    }
166
167    // Apply prune-since cutoff to filter old entries before building the tree
168    let prune_since = session
169        .store
170        .get(&Target::project(), "meta:prune:since")?
171        .and_then(|e| serde_json::from_str::<String>(&e.value).ok());
172    let prune_rules = prune::read_prune_rules(&session.store)?;
173    let prune_cutoff_ms = prune_since
174        .as_deref()
175        .map(|s| prune::parse_since_to_cutoff_ms(s, now))
176        .transpose()?;
177    let mut pruned_count = 0u64;
178    let metadata_entries = if let Some(cutoff) = prune_cutoff_ms {
179        metadata_entries
180            .into_iter()
181            .filter(|e| {
182                if e.target_type != TargetType::Project && e.last_timestamp < cutoff {
183                    pruned_count += 1;
184                    false
185                } else {
186                    true
187                }
188            })
189            .collect()
190    } else {
191        metadata_entries
192    };
193
194    // Route entries through filter rules to destinations
195    let filter_rules = parse_filter_rules(&session.store)?;
196
197    let mut dest_metadata: BTreeMap<String, Vec<SerializableEntry>> = BTreeMap::new();
198    let mut dest_tombstones: BTreeMap<String, Vec<TombstoneRecord>> = BTreeMap::new();
199    let mut dest_set_tombstones: BTreeMap<String, Vec<SetTombstoneRecord>> = BTreeMap::new();
200    let mut dest_list_tombstones: BTreeMap<String, Vec<ListTombstoneRecord>> = BTreeMap::new();
201
202    for entry in &metadata_entries {
203        let key = &entry.key;
204        if let Some(dests) = classify_key(key, &filter_rules) {
205            for dest in dests {
206                dest_metadata.entry(dest).or_default().push(entry.clone());
207            }
208        }
209    }
210
211    for entry in &tombstone_entries {
212        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
213            for dest in dests {
214                dest_tombstones.entry(dest).or_default().push(entry.clone());
215            }
216        }
217    }
218
219    for entry in &set_tombstone_entries {
220        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
221            for dest in dests {
222                dest_set_tombstones
223                    .entry(dest)
224                    .or_default()
225                    .push(entry.clone());
226            }
227        }
228    }
229
230    for entry in &list_tombstone_entries {
231        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
232            for dest in dests {
233                dest_list_tombstones
234                    .entry(dest)
235                    .or_default()
236                    .push(entry.clone());
237            }
238        }
239    }
240
241    // Ensure "main" is always present
242    dest_metadata.entry(MAIN_DEST.to_string()).or_default();
243
244    let mut all_dests: BTreeSet<String> = BTreeSet::new();
245    all_dests.extend(dest_metadata.keys().cloned());
246    all_dests.extend(dest_tombstones.keys().cloned());
247    all_dests.extend(dest_set_tombstones.keys().cloned());
248    all_dests.extend(dest_list_tombstones.keys().cloned());
249
250    let total_changes: usize = dest_metadata
251        .values()
252        .map(std::vec::Vec::len)
253        .sum::<usize>()
254        + dest_tombstones
255            .values()
256            .map(std::vec::Vec::len)
257            .sum::<usize>()
258        + dest_set_tombstones
259            .values()
260            .map(std::vec::Vec::len)
261            .sum::<usize>()
262        + dest_list_tombstones
263            .values()
264            .map(std::vec::Vec::len)
265            .sum::<usize>();
266
267    let name = session.name();
268    let email = session.email();
269    let sig = gix::actor::Signature {
270        name: name.into(),
271        email: email.into(),
272        time: gix::date::Time::new(now / 1000, 0),
273    };
274
275    let mut refs_written = Vec::new();
276    let mut auto_pruned = 0u64;
277
278    for dest in &all_dests {
279        let ref_name = session.destination_ref(dest);
280        let empty_meta: Vec<SerializableEntry> = Vec::new();
281        let empty_tomb: Vec<TombstoneRecord> = Vec::new();
282        let empty_set_tomb: Vec<SetTombstoneRecord> = Vec::new();
283        let empty_list_tomb: Vec<ListTombstoneRecord> = Vec::new();
284
285        let meta = dest_metadata.get(dest).unwrap_or(&empty_meta);
286        let tombs = dest_tombstones.get(dest).unwrap_or(&empty_tomb);
287        let set_tombs = dest_set_tombstones.get(dest).unwrap_or(&empty_set_tomb);
288        let list_tombs = dest_list_tombstones.get(dest).unwrap_or(&empty_list_tomb);
289
290        if meta.is_empty() && tombs.is_empty() && set_tombs.is_empty() && list_tombs.is_empty() {
291            continue;
292        }
293
294        // Use incremental mode only for the main destination
295        let (existing, dirty) = if dest == MAIN_DEST {
296            (existing_tree_oid, dirty_target_bases.as_ref())
297        } else {
298            (None, None)
299        };
300
301        let tree_oid = build_tree(repo, meta, tombs, set_tombs, list_tombs, existing, dirty)?;
302
303        let parent_oid = repo
304            .find_reference(&ref_name)
305            .ok()
306            .and_then(|r| r.into_fully_peeled_id().ok())
307            .map(gix::Id::detach);
308
309        let parent_tree_oid = parent_oid.as_ref().and_then(|oid| {
310            oid.attach(repo)
311                .object()
312                .ok()?
313                .into_commit()
314                .tree_id()
315                .ok()
316                .map(gix::Id::detach)
317        });
318        if parent_tree_oid == Some(tree_oid) {
319            continue;
320        }
321
322        let parents: Vec<gix::ObjectId> = parent_oid.into_iter().collect();
323        let commit_message = build_commit_message(&changes);
324        let commit = gix::objs::Commit {
325            message: commit_message.into(),
326            tree: tree_oid,
327            author: sig.clone(),
328            committer: sig.clone(),
329            encoding: None,
330            parents: parents.into(),
331            extra_headers: Default::default(),
332        };
333
334        let commit_oid = repo
335            .write_object(&commit)
336            .map_err(|e| Error::Other(format!("{e}")))?
337            .detach();
338        repo.reference(
339            ref_name.as_str(),
340            commit_oid,
341            PreviousValue::Any,
342            "git-meta: serialize",
343        )
344        .map_err(|e| Error::Other(format!("{e}")))?;
345
346        refs_written.push(ref_name.clone());
347
348        // Auto-prune only for main destination
349        if dest == MAIN_DEST {
350            if let Some(ref prune_rules_val) = prune_rules {
351                if prune::should_prune(repo, tree_oid, prune_rules_val)? {
352                    let prune_tree_oid =
353                        prune_tree(repo, tree_oid, prune_rules_val, &session.store, now)?;
354
355                    if prune_tree_oid != tree_oid {
356                        let prune_parent_oid = repo
357                            .find_reference(&ref_name)
358                            .map_err(|e| Error::Other(format!("{e}")))?
359                            .into_fully_peeled_id()
360                            .map_err(|e| Error::Other(format!("{e}")))?
361                            .detach();
362
363                        let (keys_dropped, keys_retained) =
364                            count_prune_stats(repo, tree_oid, prune_tree_oid)?;
365
366                        auto_pruned = keys_dropped;
367
368                        let min_size_str = prune_rules_val
369                            .min_size
370                            .map(|s| format!("\nmin-size: {s}"))
371                            .unwrap_or_default();
372
373                        let message = format!(
374                            "git-meta: prune --since={}\n\npruned: true\nsince: {}{}\nkeys-dropped: {}\nkeys-retained: {}",
375                            prune_rules_val.since, prune_rules_val.since, min_size_str, keys_dropped, keys_retained
376                        );
377
378                        let prune_commit = gix::objs::Commit {
379                            message: message.into(),
380                            tree: prune_tree_oid,
381                            author: sig.clone(),
382                            committer: sig.clone(),
383                            encoding: None,
384                            parents: vec![prune_parent_oid].into(),
385                            extra_headers: Default::default(),
386                        };
387
388                        let _prune_commit_oid = repo
389                            .write_object(&prune_commit)
390                            .map_err(|e| Error::Other(format!("{e}")))?
391                            .detach();
392                        repo.reference(
393                            ref_name.as_str(),
394                            _prune_commit_oid,
395                            PreviousValue::Any,
396                            "git-meta: auto-prune",
397                        )
398                        .map_err(|e| Error::Other(format!("{e}")))?;
399                    }
400                }
401            }
402        }
403    }
404
405    session.store.set_last_materialized(now)?;
406
407    Ok(SerializeOutput {
408        changes: if refs_written.is_empty() {
409            0
410        } else {
411            total_changes
412        },
413        refs_written,
414        pruned: pruned_count + auto_pruned,
415    })
416}
417
418fn metadata_add_change(entry: &SerializableEntry) -> (char, String, String) {
419    let target_label = if entry.target_type == TargetType::Project {
420        "project".to_string()
421    } else {
422        format!("{}:{}", entry.target_type, entry.target_value)
423    };
424    ('A', target_label, entry.key.clone())
425}
426
427fn ref_tree_oid(repo: &gix::Repository, ref_name: &str) -> Result<Option<gix::ObjectId>> {
428    repo.find_reference(ref_name)
429        .ok()
430        .and_then(|r| r.into_fully_peeled_id().ok())
431        .map(|id| {
432            id.object()
433                .map_err(|e| Error::Other(format!("{e}")))?
434                .into_commit()
435                .tree_id()
436                .map(gix::Id::detach)
437                .map_err(|e| Error::Other(format!("{e}")))
438        })
439        .transpose()
440}
441
442/// Build a commit message from a list of changes.
443///
444/// Each change is `(op_char, target_label, key)`.
445fn build_commit_message(changes: &[(char, String, String)]) -> String {
446    if changes.len() > MAX_COMMIT_CHANGES {
447        format!(
448            "git-meta: serialize ({} changes)\n\nchanges-omitted: true\ncount: {}",
449            changes.len(),
450            changes.len()
451        )
452    } else {
453        let mut msg = format!("git-meta: serialize ({} changes)\n", changes.len());
454        for (op, target, key) in changes {
455            msg.push('\n');
456            msg.push(*op);
457            msg.push('\t');
458            msg.push_str(target);
459            msg.push('\t');
460            msg.push_str(key);
461        }
462        msg
463    }
464}
465
466/// Build a Git tree from pre-filtered metadata (no incremental mode).
467///
468/// Used by `git-meta prune` to rebuild a tree from only the surviving entries.
469///
470/// # Parameters
471///
472/// - `repo`: the Git repository to write objects into
473/// - `metadata_entries`: metadata entries to include
474/// - `tombstone_entries`: key tombstones
475/// - `set_tombstone_entries`: set-member tombstones
476/// - `list_tombstone_entries`: list-entry tombstones
477///
478/// # Returns
479///
480/// The OID of the root Git tree object.
481///
482/// # Errors
483///
484/// Returns an error if target parsing or Git object writes fail.
485#[cfg(feature = "internal")]
486pub fn build_filtered_tree(
487    repo: &gix::Repository,
488    metadata_entries: &[SerializableEntry],
489    tombstone_entries: &[TombstoneRecord],
490    set_tombstone_entries: &[SetTombstoneRecord],
491    list_tombstone_entries: &[ListTombstoneRecord],
492) -> Result<gix::ObjectId> {
493    build_tree(
494        repo,
495        metadata_entries,
496        tombstone_entries,
497        set_tombstone_entries,
498        list_tombstone_entries,
499        None,
500        None,
501    )
502}
503
504/// Build a complete Git tree from all metadata entries.
505///
506/// When `existing_tree_oid` and `dirty_target_bases` are provided, only entries
507/// belonging to dirty targets are processed; unchanged subtrees are reused
508/// from the existing tree by OID (incremental mode).
509fn build_tree(
510    repo: &gix::Repository,
511    metadata_entries: &[SerializableEntry],
512    tombstone_entries: &[TombstoneRecord],
513    set_tombstone_entries: &[SetTombstoneRecord],
514    list_tombstone_entries: &[ListTombstoneRecord],
515    existing_tree_oid: Option<gix::ObjectId>,
516    dirty_target_bases: Option<&BTreeSet<String>>,
517) -> Result<gix::ObjectId> {
518    let mut files: BTreeMap<String, Vec<u8>> = BTreeMap::new();
519
520    for e in metadata_entries {
521        let target = if e.target_type == TargetType::Project {
522            Target::parse("project")?
523        } else {
524            Target::parse(&format!("{}:{}", e.target_type, e.target_value))?
525        };
526
527        // Skip entries for clean targets -- their subtrees will be reused
528        if let Some(dirty) = dirty_target_bases {
529            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
530                continue;
531            }
532        }
533
534        match e.value_type {
535            ValueType::String => {
536                let full_path = tree_paths::tree_path(&target, &e.key)?;
537                if e.is_git_ref {
538                    let oid = gix::ObjectId::from_hex(e.value.as_bytes())
539                        .map_err(|e| Error::Other(format!("{e}")))?;
540                    let blob = oid
541                        .attach(repo)
542                        .object()
543                        .map_err(|e| Error::Other(format!("{e}")))?
544                        .into_blob();
545                    files.insert(full_path, blob.data.clone());
546                } else {
547                    let raw_value: String = match serde_json::from_str(&e.value) {
548                        Ok(s) => s,
549                        Err(_) => e.value.clone(),
550                    };
551                    files.insert(full_path, raw_value.into_bytes());
552                }
553            }
554            ValueType::List => {
555                let list_entries =
556                    parse_entries(&e.value).map_err(|e| Error::InvalidValue(format!("{e}")))?;
557                let list_dir_path = tree_paths::list_dir_path(&target, &e.key)?;
558                for entry in list_entries {
559                    let entry_name = make_entry_name(&entry);
560                    let full_path = format!("{list_dir_path}/{entry_name}");
561                    files.insert(full_path, entry.value.into_bytes());
562                }
563            }
564            ValueType::Set => {
565                let members: Vec<String> = serde_json::from_str(&e.value)
566                    .map_err(|e| Error::InvalidValue(format!("failed to decode set value: {e}")))?;
567                let set_dir_path = tree_paths::set_dir_path(&target, &e.key)?;
568                for member in members {
569                    let member_id = crate::types::set_member_id(&member);
570                    let full_path = format!("{set_dir_path}/{member_id}");
571                    files.insert(full_path, member.into_bytes());
572                }
573            }
574        }
575    }
576
577    for record in tombstone_entries {
578        let target = if record.target_type == TargetType::Project {
579            Target::parse("project")?
580        } else {
581            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
582        };
583
584        if let Some(dirty) = dirty_target_bases {
585            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
586                continue;
587            }
588        }
589
590        let full_path = tree_paths::tombstone_path(&target, &record.key)?;
591        let payload = serde_json::to_vec(&Tombstone {
592            timestamp: record.timestamp,
593            email: record.email.clone(),
594        })?;
595        files.insert(full_path, payload);
596    }
597
598    for record in set_tombstone_entries {
599        let target = if record.target_type == TargetType::Project {
600            Target::parse("project")?
601        } else {
602            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
603        };
604
605        if let Some(dirty) = dirty_target_bases {
606            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
607                continue;
608            }
609        }
610
611        let full_path =
612            tree_paths::set_member_tombstone_path(&target, &record.key, &record.member_id)?;
613        files.insert(full_path, record.value.as_bytes().to_vec());
614    }
615
616    for record in list_tombstone_entries {
617        let target = if record.target_type == TargetType::Project {
618            Target::parse("project")?
619        } else {
620            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
621        };
622
623        if let Some(dirty) = dirty_target_bases {
624            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
625                continue;
626            }
627        }
628
629        let full_path =
630            tree_paths::list_entry_tombstone_path(&target, &record.key, &record.entry_name)?;
631        let payload = serde_json::to_vec(&Tombstone {
632            timestamp: record.timestamp,
633            email: record.email.clone(),
634        })?;
635        files.insert(full_path, payload);
636    }
637
638    // Build nested tree, reusing unchanged subtrees from existing tree
639    if let (Some(existing_oid), Some(dirty_bases)) = (existing_tree_oid, dirty_target_bases) {
640        build_tree_incremental(repo, existing_oid, &files, dirty_bases)
641    } else {
642        build_tree_from_paths(repo, &files)
643    }
644}
645
646/// Incrementally build a tree by patching an existing tree.
647///
648/// Only dirty target subtrees are rebuilt from `files`; all other subtrees
649/// are reused from the existing tree by OID.
650fn build_tree_incremental(
651    repo: &gix::Repository,
652    existing_tree_oid: gix::ObjectId,
653    files: &BTreeMap<String, Vec<u8>>,
654    dirty_target_bases: &BTreeSet<String>,
655) -> Result<gix::ObjectId> {
656    // Step 1: Remove dirty target subtrees from existing tree
657    let cleaned_oid = remove_subtrees(repo, existing_tree_oid, dirty_target_bases)?;
658
659    // Step 2: Build TreeDir from dirty files only
660    let mut root = TreeDir::default();
661    for (path, content) in files {
662        let parts: Vec<&str> = path.split('/').collect();
663        insert_path(&mut root, &parts, content.clone());
664    }
665
666    // Step 3: Merge new content into cleaned tree
667    merge_dir_into_tree(repo, &root, cleaned_oid)
668}
669
670/// Remove subtrees at specific paths from an existing tree.
671fn remove_subtrees(
672    repo: &gix::Repository,
673    tree_oid: gix::ObjectId,
674    paths: &BTreeSet<String>,
675) -> Result<gix::ObjectId> {
676    let mut grouped: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
677    let mut direct_removes: BTreeSet<String> = BTreeSet::new();
678
679    for path in paths {
680        if let Some((first, rest)) = path.split_once('/') {
681            grouped
682                .entry(first.to_string())
683                .or_default()
684                .insert(rest.to_string());
685        } else {
686            direct_removes.insert(path.clone());
687        }
688    }
689
690    let mut editor = repo
691        .edit_tree(tree_oid)
692        .map_err(|e| Error::Other(format!("{e}")))?;
693
694    for name in &direct_removes {
695        let _ = editor.remove(name);
696    }
697
698    // For grouped paths, recurse into subtrees
699    let tree = tree_oid
700        .attach(repo)
701        .object()
702        .map_err(|e| Error::Other(format!("{e}")))?
703        .into_tree();
704    for (name, sub_paths) in &grouped {
705        let entry = tree.iter().find_map(|e| {
706            let e = e.ok()?;
707            if e.filename().to_str_lossy() == *name && e.mode().is_tree() {
708                Some(e.object_id())
709            } else {
710                None
711            }
712        });
713        if let Some(subtree_oid) = entry {
714            let new_oid = remove_subtrees(repo, subtree_oid, sub_paths)?;
715            let new_tree = new_oid
716                .attach(repo)
717                .object()
718                .map_err(|e| Error::Other(format!("{e}")))?
719                .into_tree();
720            if new_tree.iter().count() > 0 {
721                editor
722                    .upsert(name, gix::objs::tree::EntryKind::Tree, new_oid)
723                    .map_err(|e| Error::Other(format!("{e}")))?;
724            } else {
725                let _ = editor.remove(name);
726            }
727        }
728    }
729
730    Ok(editor
731        .write()
732        .map_err(|e| Error::Other(format!("{e}")))?
733        .detach())
734}
735
736/// Merge a [`TreeDir`] structure into an existing tree.
737///
738/// Existing entries not present in `dir` are preserved.
739/// Entries in `dir` overwrite existing entries with the same name.
740fn merge_dir_into_tree(
741    repo: &gix::Repository,
742    dir: &TreeDir,
743    existing_oid: gix::ObjectId,
744) -> Result<gix::ObjectId> {
745    let mut editor = repo
746        .edit_tree(existing_oid)
747        .map_err(|e| Error::Other(format!("{e}")))?;
748
749    for (name, content) in &dir.files {
750        let blob_oid: gix::ObjectId = repo
751            .write_blob(content)
752            .map_err(|e| Error::Other(format!("{e}")))?
753            .into();
754        editor
755            .upsert(name, gix::objs::tree::EntryKind::Blob, blob_oid)
756            .map_err(|e| Error::Other(format!("{e}")))?;
757    }
758
759    let existing_tree = existing_oid
760        .attach(repo)
761        .object()
762        .map_err(|e| Error::Other(format!("{e}")))?
763        .into_tree();
764    for (name, child_dir) in &dir.dirs {
765        let existing_child_oid = existing_tree.iter().find_map(|e| {
766            let e = e.ok()?;
767            if e.filename().to_str_lossy() == *name && e.mode().is_tree() {
768                Some(e.object_id())
769            } else {
770                None
771            }
772        });
773
774        let child_oid = if let Some(existing_child) = existing_child_oid {
775            merge_dir_into_tree(repo, child_dir, existing_child)?
776        } else {
777            build_dir(repo, child_dir)?
778        };
779        editor
780            .upsert(name, gix::objs::tree::EntryKind::Tree, child_oid)
781            .map_err(|e| Error::Other(format!("{e}")))?;
782    }
783
784    Ok(editor
785        .write()
786        .map_err(|e| Error::Other(format!("{e}")))?
787        .detach())
788}
789
790/// Prune a serialized tree by dropping entries older than the cutoff.
791///
792/// Returns the OID of the new (possibly smaller) tree. If the tree would
793/// be unchanged, the same OID is returned.
794///
795/// # Parameters
796///
797/// - `repo`: the Git repository
798/// - `tree_oid`: the root tree to prune
799/// - `rules`: the prune rules to apply
800/// - `db`: the metadata store (for potential future use by prune helpers)
801///
802/// # Errors
803///
804/// Returns an error if Git object reads/writes fail or cutoff parsing fails.
805pub fn prune_tree(
806    repo: &gix::Repository,
807    tree_oid: gix::ObjectId,
808    rules: &PruneRules,
809    db: &Store,
810    now_ms: i64,
811) -> Result<gix::ObjectId> {
812    let cutoff_ms = prune::parse_since_to_cutoff_ms(&rules.since, now_ms)?;
813    let min_size = rules.min_size.unwrap_or(0);
814
815    let tree = tree_oid
816        .attach(repo)
817        .object()
818        .map_err(|e| Error::Other(format!("{e}")))?
819        .into_tree();
820    let mut editor = repo
821        .empty_tree()
822        .edit()
823        .map_err(|e| Error::Other(format!("{e}")))?;
824
825    for entry_result in tree.iter() {
826        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
827        let name = entry.filename().to_str_lossy().to_string();
828
829        if name == "project" {
830            editor
831                .upsert(&name, entry.mode().kind(), entry.object_id())
832                .map_err(|e| Error::Other(format!("{e}")))?;
833            continue;
834        }
835
836        if entry.mode().is_tree() {
837            let subtree_oid = entry.object_id();
838
839            // Check min-size
840            if min_size > 0 {
841                let size = prune::compute_tree_size_for(repo, subtree_oid)?;
842                if size < min_size {
843                    editor
844                        .upsert(&name, entry.mode().kind(), subtree_oid)
845                        .map_err(|e| Error::Other(format!("{e}")))?;
846                    continue;
847                }
848            }
849
850            let pruned_oid = prune_target_type_tree(repo, subtree_oid, cutoff_ms, min_size, db)?;
851            let pruned_tree = pruned_oid
852                .attach(repo)
853                .object()
854                .map_err(|e| Error::Other(format!("{e}")))?
855                .into_tree();
856            if pruned_tree.iter().count() > 0 {
857                editor
858                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
859                    .map_err(|e| Error::Other(format!("{e}")))?;
860            }
861        } else {
862            editor
863                .upsert(&name, entry.mode().kind(), entry.object_id())
864                .map_err(|e| Error::Other(format!("{e}")))?;
865        }
866    }
867
868    Ok(editor
869        .write()
870        .map_err(|e| Error::Other(format!("{e}")))?
871        .detach())
872}
873
874fn prune_target_type_tree(
875    repo: &gix::Repository,
876    tree_oid: gix::ObjectId,
877    cutoff_ms: i64,
878    min_size: u64,
879    db: &Store,
880) -> Result<gix::ObjectId> {
881    let tree = tree_oid
882        .attach(repo)
883        .object()
884        .map_err(|e| Error::Other(format!("{e}")))?
885        .into_tree();
886    let mut editor = repo
887        .empty_tree()
888        .edit()
889        .map_err(|e| Error::Other(format!("{e}")))?;
890
891    for entry_result in tree.iter() {
892        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
893        let name = entry.filename().to_str_lossy().to_string();
894
895        if entry.mode().is_tree() {
896            let subtree_oid = entry.object_id();
897            let pruned_oid = prune_subtree_recursive(repo, subtree_oid, cutoff_ms, min_size, db)?;
898            let pruned_tree = pruned_oid
899                .attach(repo)
900                .object()
901                .map_err(|e| Error::Other(format!("{e}")))?
902                .into_tree();
903            if pruned_tree.iter().count() > 0 {
904                editor
905                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
906                    .map_err(|e| Error::Other(format!("{e}")))?;
907            }
908        } else {
909            editor
910                .upsert(&name, entry.mode().kind(), entry.object_id())
911                .map_err(|e| Error::Other(format!("{e}")))?;
912        }
913    }
914
915    Ok(editor
916        .write()
917        .map_err(|e| Error::Other(format!("{e}")))?
918        .detach())
919}
920
921fn prune_subtree_recursive(
922    repo: &gix::Repository,
923    tree_oid: gix::ObjectId,
924    cutoff_ms: i64,
925    _min_size: u64,
926    _db: &Store,
927) -> Result<gix::ObjectId> {
928    let tree = tree_oid
929        .attach(repo)
930        .object()
931        .map_err(|e| Error::Other(format!("{e}")))?
932        .into_tree();
933    let mut editor = repo
934        .empty_tree()
935        .edit()
936        .map_err(|e| Error::Other(format!("{e}")))?;
937
938    for entry_result in tree.iter() {
939        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
940        let name = entry.filename().to_str_lossy().to_string();
941
942        if entry.mode().is_tree() {
943            if name == "__list" {
944                let list_tree_oid = entry.object_id();
945                let pruned_oid = prune_list_tree(repo, list_tree_oid, cutoff_ms)?;
946                let pruned_tree = pruned_oid
947                    .attach(repo)
948                    .object()
949                    .map_err(|e| Error::Other(format!("{e}")))?
950                    .into_tree();
951                if pruned_tree.iter().count() > 0 {
952                    editor
953                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
954                        .map_err(|e| Error::Other(format!("{e}")))?;
955                }
956            } else if name == "__tombstones" {
957                let tomb_tree_oid = entry.object_id();
958                let pruned_oid = prune_tombstone_tree(repo, tomb_tree_oid, cutoff_ms)?;
959                let pruned_tree = pruned_oid
960                    .attach(repo)
961                    .object()
962                    .map_err(|e| Error::Other(format!("{e}")))?
963                    .into_tree();
964                if pruned_tree.iter().count() > 0 {
965                    editor
966                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
967                        .map_err(|e| Error::Other(format!("{e}")))?;
968                }
969            } else {
970                let subtree_oid = entry.object_id();
971                let pruned_oid =
972                    prune_subtree_recursive(repo, subtree_oid, cutoff_ms, _min_size, _db)?;
973                let pruned_tree = pruned_oid
974                    .attach(repo)
975                    .object()
976                    .map_err(|e| Error::Other(format!("{e}")))?
977                    .into_tree();
978                if pruned_tree.iter().count() > 0 {
979                    editor
980                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
981                        .map_err(|e| Error::Other(format!("{e}")))?;
982                }
983            }
984        } else {
985            editor
986                .upsert(&name, entry.mode().kind(), entry.object_id())
987                .map_err(|e| Error::Other(format!("{e}")))?;
988        }
989    }
990
991    Ok(editor
992        .write()
993        .map_err(|e| Error::Other(format!("{e}")))?
994        .detach())
995}
996
997fn prune_list_tree(
998    repo: &gix::Repository,
999    tree_oid: gix::ObjectId,
1000    cutoff_ms: i64,
1001) -> Result<gix::ObjectId> {
1002    let tree = tree_oid
1003        .attach(repo)
1004        .object()
1005        .map_err(|e| Error::Other(format!("{e}")))?
1006        .into_tree();
1007    let mut editor = repo
1008        .empty_tree()
1009        .edit()
1010        .map_err(|e| Error::Other(format!("{e}")))?;
1011
1012    for entry_result in tree.iter() {
1013        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1014        let name = entry.filename().to_str_lossy().to_string();
1015        // Entry names are formatted as "{timestamp_ms}-{hash5}"
1016        if let Some((ts_str, _)) = name.split_once('-') {
1017            if let Ok(ts) = ts_str.parse::<i64>() {
1018                if ts < cutoff_ms {
1019                    continue; // Drop old entry
1020                }
1021            }
1022        }
1023        editor
1024            .upsert(&name, entry.mode().kind(), entry.object_id())
1025            .map_err(|e| Error::Other(format!("{e}")))?;
1026    }
1027
1028    Ok(editor
1029        .write()
1030        .map_err(|e| Error::Other(format!("{e}")))?
1031        .detach())
1032}
1033
1034fn prune_tombstone_tree(
1035    repo: &gix::Repository,
1036    tree_oid: gix::ObjectId,
1037    cutoff_ms: i64,
1038) -> Result<gix::ObjectId> {
1039    let tree = tree_oid
1040        .attach(repo)
1041        .object()
1042        .map_err(|e| Error::Other(format!("{e}")))?
1043        .into_tree();
1044    let mut editor = repo
1045        .empty_tree()
1046        .edit()
1047        .map_err(|e| Error::Other(format!("{e}")))?;
1048
1049    for entry_result in tree.iter() {
1050        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1051        let name = entry.filename().to_str_lossy().to_string();
1052
1053        if entry.mode().is_tree() {
1054            let subtree_oid = entry.object_id();
1055            let pruned_oid = prune_tombstone_tree(repo, subtree_oid, cutoff_ms)?;
1056            let pruned_tree = pruned_oid
1057                .attach(repo)
1058                .object()
1059                .map_err(|e| Error::Other(format!("{e}")))?
1060                .into_tree();
1061            if pruned_tree.iter().count() > 0 {
1062                editor
1063                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1064                    .map_err(|e| Error::Other(format!("{e}")))?;
1065            }
1066        } else if entry.mode().is_blob() && name == "__deleted" {
1067            let blob = entry
1068                .object_id()
1069                .attach(repo)
1070                .object()
1071                .map_err(|e| Error::Other(format!("{e}")))?
1072                .into_blob();
1073            if let Ok(content) = std::str::from_utf8(&blob.data) {
1074                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
1075                    if let Some(ts) = parsed.get("timestamp").and_then(serde_json::Value::as_i64) {
1076                        if ts < cutoff_ms {
1077                            continue; // Drop old tombstone
1078                        }
1079                    }
1080                }
1081            }
1082            editor
1083                .upsert(&name, entry.mode().kind(), entry.object_id())
1084                .map_err(|e| Error::Other(format!("{e}")))?;
1085        } else {
1086            editor
1087                .upsert(&name, entry.mode().kind(), entry.object_id())
1088                .map_err(|e| Error::Other(format!("{e}")))?;
1089        }
1090    }
1091
1092    Ok(editor
1093        .write()
1094        .map_err(|e| Error::Other(format!("{e}")))?
1095        .detach())
1096}
1097
1098/// Count keys in original and pruned trees to produce stats.
1099///
1100/// Returns `(keys_dropped, keys_retained)`.
1101///
1102/// # Errors
1103///
1104/// Returns an error if Git object reads fail.
1105pub fn count_prune_stats(
1106    repo: &gix::Repository,
1107    original_oid: gix::ObjectId,
1108    pruned_oid: gix::ObjectId,
1109) -> Result<(u64, u64)> {
1110    let mut original_count = 0u64;
1111    count_all_blobs(repo, original_oid, &mut original_count)?;
1112
1113    let mut pruned_count = 0u64;
1114    count_all_blobs(repo, pruned_oid, &mut pruned_count)?;
1115
1116    let dropped = original_count.saturating_sub(pruned_count);
1117    Ok((dropped, pruned_count))
1118}
1119
1120fn count_all_blobs(repo: &gix::Repository, tree_oid: gix::ObjectId, count: &mut u64) -> Result<()> {
1121    let tree = tree_oid
1122        .attach(repo)
1123        .object()
1124        .map_err(|e| Error::Other(format!("{e}")))?
1125        .into_tree();
1126    for entry_result in tree.iter() {
1127        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1128        if entry.mode().is_blob() {
1129            *count += 1;
1130        } else if entry.mode().is_tree() {
1131            count_all_blobs(repo, entry.object_id(), count)?;
1132        }
1133    }
1134    Ok(())
1135}