Skip to main content

git_meta_lib/
serialize.rs

1//! Serialize local metadata to Git tree(s) and commit(s).
2//!
3//! This module implements the full serialization workflow: reading metadata
4//! from the SQLite store, building Git trees (full or incremental), creating
5//! commits, updating refs, and optionally auto-pruning old entries.
6//!
7//! The public entry point is [`run()`], which takes a [`Session`](crate::Session)
8//! and returns a [`SerializeOutput`] describing what was written.
9
10use std::collections::{BTreeMap, BTreeSet};
11
12use gix::bstr::ByteSlice;
13use gix::prelude::ObjectIdExt;
14use gix::refs::transaction::PreviousValue;
15
16use crate::db::types::{
17    ListTombstoneRecord, Operation, SerializableEntry, SetTombstoneRecord, TombstoneRecord,
18};
19use crate::db::Store;
20use crate::error::{Error, Result};
21use crate::list_value::{encode_entries, make_entry_name, parse_entries};
22use crate::prune::{self, PruneRules};
23use crate::session::Session;
24use crate::tree::filter::{classify_key, parse_filter_rules, FilterRule, MAIN_DEST};
25use crate::tree::format::{build_dir, build_tree_from_paths, insert_path, TreeDir};
26use crate::tree::model::Tombstone;
27use crate::tree_paths;
28use crate::types::{Target, TargetType, ValueType};
29
30/// Maximum number of individual change lines included in a commit message.
31const MAX_COMMIT_CHANGES: usize = 1000;
32
33/// Result of a serialize operation.
34///
35/// Contains all the information needed by a CLI or other consumer
36/// to report what happened, without performing any I/O itself.
37#[must_use]
38#[derive(Debug, Clone, PartialEq, Eq, Default)]
39pub struct SerializeOutput {
40    /// Number of metadata changes serialized (total entries across all destinations).
41    pub changes: usize,
42    /// Refs that were written, e.g. `["refs/meta/local/main"]`.
43    pub refs_written: Vec<String>,
44    /// Number of entries dropped by auto-prune (0 if no prune triggered).
45    pub pruned: u64,
46}
47
48/// Serialization mode used for progress reporting.
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum SerializeMode {
51    /// Incremental serialization based on metadata modified since the last materialization marker.
52    Incremental,
53    /// Full serialization from every hydrated SQLite row.
54    Full,
55}
56
57/// Progress event emitted while serializing metadata.
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub enum SerializeProgress {
60    /// SQLite metadata is being read.
61    Reading {
62        /// Whether the run is incremental or full.
63        mode: SerializeMode,
64    },
65    /// SQLite metadata has been read.
66    Read {
67        /// Number of hydrated metadata entries read.
68        metadata: usize,
69        /// Number of metadata tombstones read.
70        tombstones: usize,
71        /// Number of set-member tombstones read.
72        set_tombstones: usize,
73        /// Number of list-entry tombstones read.
74        list_tombstones: usize,
75        /// Number of change records that will be described in the serialize commit.
76        changes: usize,
77    },
78    /// Old metadata was skipped by `meta:prune:since`.
79    Pruned {
80        /// Number of metadata entries skipped before tree construction.
81        entries: u64,
82    },
83    /// Metadata has been routed to destination refs.
84    Routed {
85        /// Number of destination refs that may be written.
86        destinations: usize,
87        /// Number of metadata/tombstone records routed across all destinations.
88        records: usize,
89    },
90    /// A destination ref tree is being built.
91    BuildingRef {
92        /// Ref name being built.
93        ref_name: String,
94        /// Number of metadata/tombstone records included in this destination.
95        records: usize,
96    },
97    /// A destination ref was unchanged after rebuilding its tree.
98    RefUnchanged {
99        /// Ref name that did not need an update.
100        ref_name: String,
101    },
102    /// A destination ref was written.
103    RefWritten {
104        /// Ref name that was updated.
105        ref_name: String,
106    },
107    /// Auto-prune wrote a follow-up pruned commit.
108    AutoPruned {
109        /// Ref name that was auto-pruned.
110        ref_name: String,
111        /// Number of keys dropped from the serialized tree.
112        keys_dropped: u64,
113        /// Number of keys retained in the serialized tree.
114        keys_retained: u64,
115    },
116}
117
118/// Serialize local metadata to Git tree(s) and commit(s).
119///
120/// Determines incremental vs full mode automatically based on
121/// `last_materialized`, unless `force_full` is true. Applies filter routing
122/// and pruning rules. Updates local refs and the materialization timestamp.
123///
124/// # Parameters
125///
126/// - `session`: the gmeta session providing the repository, store, and config.
127/// - `now`: the current timestamp in milliseconds since the Unix epoch,
128///   used for the commit signature and the `last_materialized` marker.
129/// - `force_full`: when true, ignore incremental dirty-target detection and
130///   rebuild serialized trees from the complete SQLite state.
131///
132/// # Returns
133///
134/// A [`SerializeOutput`] with counts and written refs. If there is nothing
135/// to serialize, `changes` will be `0` and `refs_written` will be empty.
136///
137/// # Errors
138///
139/// Returns an error if database reads, Git object writes, or ref updates fail.
140pub fn run(session: &Session, now: i64, force_full: bool) -> Result<SerializeOutput> {
141    run_with_progress(session, now, force_full, |_| {})
142}
143
144/// Serialize local metadata and report progress through a callback.
145///
146/// # Parameters
147///
148/// - `session`: the gmeta session providing the repository, store, and config.
149/// - `now`: the current timestamp in milliseconds since the Unix epoch,
150///   used for the commit signature and the `last_materialized` marker.
151/// - `force_full`: when true, ignore incremental dirty-target detection and
152///   rebuild serialized trees from the complete SQLite state.
153/// - `progress`: callback invoked at major serialization steps.
154///
155/// # Returns
156///
157/// A [`SerializeOutput`] with counts and written refs. If there is nothing
158/// to serialize, `changes` will be `0` and `refs_written` will be empty.
159///
160/// # Errors
161///
162/// Returns an error if database reads, Git object writes, or ref updates fail.
163pub fn run_with_progress(
164    session: &Session,
165    now: i64,
166    force_full: bool,
167    mut progress: impl FnMut(SerializeProgress),
168) -> Result<SerializeOutput> {
169    let repo = &session.repo;
170    let local_ref_name = session.local_ref();
171    let last_materialized = session.store.get_last_materialized()?;
172
173    // Determine existing tree for incremental mode
174    let existing_tree_oid = ref_tree_oid(repo, &local_ref_name)?;
175
176    // Determine incremental vs full mode and collect entries + changes
177    let (
178        metadata_entries,
179        tombstone_entries,
180        set_tombstone_entries,
181        list_tombstone_entries,
182        dirty_target_bases,
183        changes,
184    ) = if let (false, Some(since)) = (force_full, last_materialized) {
185        progress(SerializeProgress::Reading {
186            mode: SerializeMode::Incremental,
187        });
188        let modified = session.store.get_modified_since(since)?;
189        let metadata = session.store.get_all_metadata()?;
190        let changes: Vec<(char, String, String)> = if modified.is_empty() {
191            metadata.iter().map(metadata_add_change).collect()
192        } else {
193            modified
194                .iter()
195                .map(|entry| {
196                    let op_char = match entry.operation {
197                        Operation::Remove => 'D',
198                        Operation::Set => {
199                            if existing_tree_oid.is_some() {
200                                'M'
201                            } else {
202                                'A'
203                            }
204                        }
205                        _ => 'M',
206                    };
207                    let target_label = if entry.target_type == TargetType::Project {
208                        "project".to_string()
209                    } else {
210                        format!("{}:{}", entry.target_type, entry.target_value)
211                    };
212                    (op_char, target_label, entry.key.clone())
213                })
214                .collect()
215        };
216
217        // Compute dirty target base paths from modified entries
218        let mut dirty_bases: BTreeSet<String> = BTreeSet::new();
219        for entry in &modified {
220            let target = if entry.target_type == TargetType::Project {
221                Target::parse("project")?
222            } else {
223                Target::parse(&format!("{}:{}", entry.target_type, entry.target_value))?
224            };
225            dirty_bases.insert(tree_paths::tree_base_path(&target));
226        }
227
228        let tombstones = session.store.get_all_tombstones()?;
229        let set_tombstones = session.store.get_all_set_tombstones()?;
230        let list_tombstones = session.store.get_all_list_tombstones()?;
231        progress(SerializeProgress::Read {
232            metadata: metadata.len(),
233            tombstones: tombstones.len(),
234            set_tombstones: set_tombstones.len(),
235            list_tombstones: list_tombstones.len(),
236            changes: changes.len(),
237        });
238
239        (
240            metadata,
241            tombstones,
242            set_tombstones,
243            list_tombstones,
244            if existing_tree_oid.is_some() && !modified.is_empty() {
245                Some(dirty_bases)
246            } else {
247                None
248            },
249            changes,
250        )
251    } else {
252        progress(SerializeProgress::Reading {
253            mode: SerializeMode::Full,
254        });
255        let metadata = session.store.get_all_metadata()?;
256
257        let changes: Vec<(char, String, String)> =
258            metadata.iter().map(metadata_add_change).collect();
259        let tombstones = session.store.get_all_tombstones()?;
260        let set_tombstones = session.store.get_all_set_tombstones()?;
261        let list_tombstones = session.store.get_all_list_tombstones()?;
262        progress(SerializeProgress::Read {
263            metadata: metadata.len(),
264            tombstones: tombstones.len(),
265            set_tombstones: set_tombstones.len(),
266            list_tombstones: list_tombstones.len(),
267            changes: changes.len(),
268        });
269
270        (
271            metadata,
272            tombstones,
273            set_tombstones,
274            list_tombstones,
275            None,
276            changes,
277        )
278    };
279
280    if metadata_entries.is_empty() && tombstone_entries.is_empty() {
281        return Ok(SerializeOutput {
282            changes: 0,
283            refs_written: Vec::new(),
284            pruned: 0,
285        });
286    }
287
288    let prune_rules = if force_full {
289        None
290    } else {
291        prune::read_prune_rules(&session.store)?
292    };
293
294    // Route entries through filter rules to destinations
295    let filter_rules = parse_filter_rules(&session.store)?;
296
297    let mut dest_metadata: BTreeMap<String, Vec<SerializableEntry>> = BTreeMap::new();
298    let mut dest_tombstones: BTreeMap<String, Vec<TombstoneRecord>> = BTreeMap::new();
299    let mut dest_set_tombstones: BTreeMap<String, Vec<SetTombstoneRecord>> = BTreeMap::new();
300    let mut dest_list_tombstones: BTreeMap<String, Vec<ListTombstoneRecord>> = BTreeMap::new();
301
302    for entry in &metadata_entries {
303        let key = &entry.key;
304        if let Some(dests) = classify_key(key, &filter_rules) {
305            for dest in dests {
306                dest_metadata.entry(dest).or_default().push(entry.clone());
307            }
308        }
309    }
310
311    for entry in &tombstone_entries {
312        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
313            for dest in dests {
314                dest_tombstones.entry(dest).or_default().push(entry.clone());
315            }
316        }
317    }
318
319    for entry in &set_tombstone_entries {
320        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
321            for dest in dests {
322                dest_set_tombstones
323                    .entry(dest)
324                    .or_default()
325                    .push(entry.clone());
326            }
327        }
328    }
329
330    for entry in &list_tombstone_entries {
331        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
332            for dest in dests {
333                dest_list_tombstones
334                    .entry(dest)
335                    .or_default()
336                    .push(entry.clone());
337            }
338        }
339    }
340
341    // Ensure "main" is always present
342    dest_metadata.entry(MAIN_DEST.to_string()).or_default();
343
344    let mut all_dests: BTreeSet<String> = BTreeSet::new();
345    all_dests.extend(dest_metadata.keys().cloned());
346    all_dests.extend(dest_tombstones.keys().cloned());
347    all_dests.extend(dest_set_tombstones.keys().cloned());
348    all_dests.extend(dest_list_tombstones.keys().cloned());
349
350    let total_changes: usize = dest_metadata
351        .values()
352        .map(std::vec::Vec::len)
353        .sum::<usize>()
354        + dest_tombstones
355            .values()
356            .map(std::vec::Vec::len)
357            .sum::<usize>()
358        + dest_set_tombstones
359            .values()
360            .map(std::vec::Vec::len)
361            .sum::<usize>()
362        + dest_list_tombstones
363            .values()
364            .map(std::vec::Vec::len)
365            .sum::<usize>();
366    progress(SerializeProgress::Routed {
367        destinations: all_dests.len(),
368        records: total_changes,
369    });
370
371    let name = session.name();
372    let email = session.email();
373    let sig = gix::actor::Signature {
374        name: name.into(),
375        email: email.into(),
376        time: gix::date::Time::new(now / 1000, 0),
377    };
378
379    let mut refs_written = Vec::new();
380    let mut auto_pruned = 0u64;
381
382    for dest in &all_dests {
383        let ref_name = session.destination_ref(dest);
384        let empty_meta: Vec<SerializableEntry> = Vec::new();
385        let empty_tomb: Vec<TombstoneRecord> = Vec::new();
386        let empty_set_tomb: Vec<SetTombstoneRecord> = Vec::new();
387        let empty_list_tomb: Vec<ListTombstoneRecord> = Vec::new();
388
389        let meta = dest_metadata.get(dest).unwrap_or(&empty_meta);
390        let tombs = dest_tombstones.get(dest).unwrap_or(&empty_tomb);
391        let set_tombs = dest_set_tombstones.get(dest).unwrap_or(&empty_set_tomb);
392        let list_tombs = dest_list_tombstones.get(dest).unwrap_or(&empty_list_tomb);
393
394        if meta.is_empty() && tombs.is_empty() && set_tombs.is_empty() && list_tombs.is_empty() {
395            continue;
396        }
397        let dest_records = meta.len() + tombs.len() + set_tombs.len() + list_tombs.len();
398        progress(SerializeProgress::BuildingRef {
399            ref_name: ref_name.clone(),
400            records: dest_records,
401        });
402
403        // Use incremental mode only for the main destination
404        let (existing, dirty) = if dest == MAIN_DEST {
405            (existing_tree_oid, dirty_target_bases.as_ref())
406        } else {
407            (None, None)
408        };
409
410        let tree_oid = build_tree(repo, meta, tombs, set_tombs, list_tombs, existing, dirty)?;
411
412        let parent_oid = repo
413            .find_reference(&ref_name)
414            .ok()
415            .and_then(|r| r.into_fully_peeled_id().ok())
416            .map(gix::Id::detach);
417
418        let parent_tree_oid = parent_oid.as_ref().and_then(|oid| {
419            oid.attach(repo)
420                .object()
421                .ok()?
422                .into_commit()
423                .tree_id()
424                .ok()
425                .map(gix::Id::detach)
426        });
427        if parent_tree_oid == Some(tree_oid) {
428            progress(SerializeProgress::RefUnchanged {
429                ref_name: ref_name.clone(),
430            });
431            continue;
432        }
433
434        let parents: Vec<gix::ObjectId> = parent_oid.into_iter().collect();
435        let commit_message = build_commit_message(&changes);
436        let commit = gix::objs::Commit {
437            message: commit_message.into(),
438            tree: tree_oid,
439            author: sig.clone(),
440            committer: sig.clone(),
441            encoding: None,
442            parents: parents.into(),
443            extra_headers: Default::default(),
444        };
445
446        let commit_oid = repo
447            .write_object(&commit)
448            .map_err(|e| Error::Other(format!("{e}")))?
449            .detach();
450        repo.reference(
451            ref_name.as_str(),
452            commit_oid,
453            PreviousValue::Any,
454            "git-meta: serialize",
455        )
456        .map_err(|e| Error::Other(format!("{e}")))?;
457
458        refs_written.push(ref_name.clone());
459        progress(SerializeProgress::RefWritten {
460            ref_name: ref_name.clone(),
461        });
462
463        // Auto-prune only for main destination
464        if dest == MAIN_DEST {
465            if let Some(ref prune_rules_val) = prune_rules {
466                if prune::should_prune(repo, tree_oid, prune_rules_val)? {
467                    let prune_tree_oid = auto_prune_tree(
468                        repo,
469                        &metadata_entries,
470                        &tombstone_entries,
471                        &set_tombstone_entries,
472                        &list_tombstone_entries,
473                        &filter_rules,
474                        prune_rules_val,
475                        now,
476                    )?;
477
478                    if prune_tree_oid != tree_oid {
479                        let prune_parent_oid = repo
480                            .find_reference(&ref_name)
481                            .map_err(|e| Error::Other(format!("{e}")))?
482                            .into_fully_peeled_id()
483                            .map_err(|e| Error::Other(format!("{e}")))?
484                            .detach();
485
486                        let (keys_dropped, keys_retained) =
487                            count_prune_stats(repo, tree_oid, prune_tree_oid)?;
488
489                        auto_pruned = keys_dropped;
490                        progress(SerializeProgress::AutoPruned {
491                            ref_name: ref_name.clone(),
492                            keys_dropped,
493                            keys_retained,
494                        });
495
496                        let min_size_str = prune_rules_val
497                            .min_size
498                            .map(|s| format!("\nmin-size: {s}"))
499                            .unwrap_or_default();
500
501                        let message = format!(
502                            "git-meta: prune --since={}\n\npruned: true\nsince: {}{}\nkeys-dropped: {}\nkeys-retained: {}",
503                            prune_rules_val.since, prune_rules_val.since, min_size_str, keys_dropped, keys_retained
504                        );
505
506                        let prune_commit = gix::objs::Commit {
507                            message: message.into(),
508                            tree: prune_tree_oid,
509                            author: sig.clone(),
510                            committer: sig.clone(),
511                            encoding: None,
512                            parents: vec![prune_parent_oid].into(),
513                            extra_headers: Default::default(),
514                        };
515
516                        let _prune_commit_oid = repo
517                            .write_object(&prune_commit)
518                            .map_err(|e| Error::Other(format!("{e}")))?
519                            .detach();
520                        repo.reference(
521                            ref_name.as_str(),
522                            _prune_commit_oid,
523                            PreviousValue::Any,
524                            "git-meta: auto-prune",
525                        )
526                        .map_err(|e| Error::Other(format!("{e}")))?;
527                    }
528                }
529            }
530        }
531    }
532
533    session.store.set_last_materialized(now)?;
534
535    Ok(SerializeOutput {
536        changes: if refs_written.is_empty() {
537            0
538        } else {
539            total_changes
540        },
541        refs_written,
542        pruned: auto_pruned,
543    })
544}
545
546fn metadata_add_change(entry: &SerializableEntry) -> (char, String, String) {
547    let target_label = if entry.target_type == TargetType::Project {
548        "project".to_string()
549    } else {
550        format!("{}:{}", entry.target_type, entry.target_value)
551    };
552    ('A', target_label, entry.key.clone())
553}
554
555fn ref_tree_oid(repo: &gix::Repository, ref_name: &str) -> Result<Option<gix::ObjectId>> {
556    repo.find_reference(ref_name)
557        .ok()
558        .and_then(|r| r.into_fully_peeled_id().ok())
559        .map(|id| {
560            id.object()
561                .map_err(|e| Error::Other(format!("{e}")))?
562                .into_commit()
563                .tree_id()
564                .map(gix::Id::detach)
565                .map_err(|e| Error::Other(format!("{e}")))
566        })
567        .transpose()
568}
569
570/// Build a commit message from a list of changes.
571///
572/// Each change is `(op_char, target_label, key)`.
573fn build_commit_message(changes: &[(char, String, String)]) -> String {
574    if changes.len() > MAX_COMMIT_CHANGES {
575        format!(
576            "git-meta: serialize ({} changes)\n\nchanges-omitted: true\ncount: {}",
577            changes.len(),
578            changes.len()
579        )
580    } else {
581        let mut msg = format!("git-meta: serialize ({} changes)\n", changes.len());
582        for (op, target, key) in changes {
583            msg.push('\n');
584            msg.push(*op);
585            msg.push('\t');
586            msg.push_str(target);
587            msg.push('\t');
588            msg.push_str(key);
589        }
590        msg
591    }
592}
593
594/// Build a Git tree from pre-filtered metadata (no incremental mode).
595///
596/// Used by `git-meta prune` to rebuild a tree from only the surviving entries.
597///
598/// # Parameters
599///
600/// - `repo`: the Git repository to write objects into
601/// - `metadata_entries`: metadata entries to include
602/// - `tombstone_entries`: key tombstones
603/// - `set_tombstone_entries`: set-member tombstones
604/// - `list_tombstone_entries`: list-entry tombstones
605///
606/// # Returns
607///
608/// The OID of the root Git tree object.
609///
610/// # Errors
611///
612/// Returns an error if target parsing or Git object writes fail.
613#[cfg(feature = "internal")]
614pub fn build_filtered_tree(
615    repo: &gix::Repository,
616    metadata_entries: &[SerializableEntry],
617    tombstone_entries: &[TombstoneRecord],
618    set_tombstone_entries: &[SetTombstoneRecord],
619    list_tombstone_entries: &[ListTombstoneRecord],
620) -> Result<gix::ObjectId> {
621    build_tree(
622        repo,
623        metadata_entries,
624        tombstone_entries,
625        set_tombstone_entries,
626        list_tombstone_entries,
627        None,
628        None,
629    )
630}
631
632/// Build a complete Git tree from all metadata entries.
633///
634/// When `existing_tree_oid` and `dirty_target_bases` are provided, only entries
635/// belonging to dirty targets are processed; unchanged subtrees are reused
636/// from the existing tree by OID (incremental mode).
637fn build_tree(
638    repo: &gix::Repository,
639    metadata_entries: &[SerializableEntry],
640    tombstone_entries: &[TombstoneRecord],
641    set_tombstone_entries: &[SetTombstoneRecord],
642    list_tombstone_entries: &[ListTombstoneRecord],
643    existing_tree_oid: Option<gix::ObjectId>,
644    dirty_target_bases: Option<&BTreeSet<String>>,
645) -> Result<gix::ObjectId> {
646    let mut files: BTreeMap<String, Vec<u8>> = BTreeMap::new();
647
648    for e in metadata_entries {
649        let target = if e.target_type == TargetType::Project {
650            Target::parse("project")?
651        } else {
652            Target::parse(&format!("{}:{}", e.target_type, e.target_value))?
653        };
654
655        // Skip entries for clean targets -- their subtrees will be reused
656        if let Some(dirty) = dirty_target_bases {
657            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
658                continue;
659            }
660        }
661
662        match e.value_type {
663            ValueType::String => {
664                let full_path = tree_paths::tree_path(&target, &e.key)?;
665                if e.is_git_ref {
666                    let oid = gix::ObjectId::from_hex(e.value.as_bytes())
667                        .map_err(|e| Error::Other(format!("{e}")))?;
668                    let blob = oid
669                        .attach(repo)
670                        .object()
671                        .map_err(|e| Error::Other(format!("{e}")))?
672                        .into_blob();
673                    files.insert(full_path, blob.data.clone());
674                } else {
675                    let raw_value: String = match serde_json::from_str(&e.value) {
676                        Ok(s) => s,
677                        Err(_) => e.value.clone(),
678                    };
679                    files.insert(full_path, raw_value.into_bytes());
680                }
681            }
682            ValueType::List => {
683                let list_entries =
684                    parse_entries(&e.value).map_err(|e| Error::InvalidValue(format!("{e}")))?;
685                let list_dir_path = tree_paths::list_dir_path(&target, &e.key)?;
686                for entry in list_entries {
687                    let entry_name = make_entry_name(&entry);
688                    let full_path = format!("{list_dir_path}/{entry_name}");
689                    files.insert(full_path, entry.value.into_bytes());
690                }
691            }
692            ValueType::Set => {
693                let members: Vec<String> = serde_json::from_str(&e.value)
694                    .map_err(|e| Error::InvalidValue(format!("failed to decode set value: {e}")))?;
695                let set_dir_path = tree_paths::set_dir_path(&target, &e.key)?;
696                for member in members {
697                    let member_id = crate::types::set_member_id(&member);
698                    let full_path = format!("{set_dir_path}/{member_id}");
699                    files.insert(full_path, member.into_bytes());
700                }
701            }
702        }
703    }
704
705    for record in tombstone_entries {
706        let target = if record.target_type == TargetType::Project {
707            Target::parse("project")?
708        } else {
709            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
710        };
711
712        if let Some(dirty) = dirty_target_bases {
713            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
714                continue;
715            }
716        }
717
718        let full_path = tree_paths::tombstone_path(&target, &record.key)?;
719        let payload = serde_json::to_vec(&Tombstone {
720            timestamp: record.timestamp,
721            email: record.email.clone(),
722        })?;
723        files.insert(full_path, payload);
724    }
725
726    for record in set_tombstone_entries {
727        let target = if record.target_type == TargetType::Project {
728            Target::parse("project")?
729        } else {
730            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
731        };
732
733        if let Some(dirty) = dirty_target_bases {
734            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
735                continue;
736            }
737        }
738
739        let full_path =
740            tree_paths::set_member_tombstone_path(&target, &record.key, &record.member_id)?;
741        files.insert(full_path, record.value.as_bytes().to_vec());
742    }
743
744    for record in list_tombstone_entries {
745        let target = if record.target_type == TargetType::Project {
746            Target::parse("project")?
747        } else {
748            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
749        };
750
751        if let Some(dirty) = dirty_target_bases {
752            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
753                continue;
754            }
755        }
756
757        let full_path =
758            tree_paths::list_entry_tombstone_path(&target, &record.key, &record.entry_name)?;
759        let payload = serde_json::to_vec(&Tombstone {
760            timestamp: record.timestamp,
761            email: record.email.clone(),
762        })?;
763        files.insert(full_path, payload);
764    }
765
766    // Build nested tree, reusing unchanged subtrees from existing tree
767    if let (Some(existing_oid), Some(dirty_bases)) = (existing_tree_oid, dirty_target_bases) {
768        build_tree_incremental(repo, existing_oid, &files, dirty_bases)
769    } else {
770        build_tree_from_paths(repo, &files)
771    }
772}
773
774/// Incrementally build a tree by patching an existing tree.
775///
776/// Only dirty target subtrees are rebuilt from `files`; all other subtrees
777/// are reused from the existing tree by OID.
778fn build_tree_incremental(
779    repo: &gix::Repository,
780    existing_tree_oid: gix::ObjectId,
781    files: &BTreeMap<String, Vec<u8>>,
782    dirty_target_bases: &BTreeSet<String>,
783) -> Result<gix::ObjectId> {
784    // Step 1: Remove dirty target subtrees from existing tree
785    let cleaned_oid = remove_subtrees(repo, existing_tree_oid, dirty_target_bases)?;
786
787    // Step 2: Build TreeDir from dirty files only
788    let mut root = TreeDir::default();
789    for (path, content) in files {
790        let parts: Vec<&str> = path.split('/').collect();
791        insert_path(&mut root, &parts, content.clone());
792    }
793
794    // Step 3: Merge new content into cleaned tree
795    merge_dir_into_tree(repo, &root, cleaned_oid)
796}
797
798/// Remove subtrees at specific paths from an existing tree.
799fn remove_subtrees(
800    repo: &gix::Repository,
801    tree_oid: gix::ObjectId,
802    paths: &BTreeSet<String>,
803) -> Result<gix::ObjectId> {
804    let mut grouped: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
805    let mut direct_removes: BTreeSet<String> = BTreeSet::new();
806
807    for path in paths {
808        if let Some((first, rest)) = path.split_once('/') {
809            grouped
810                .entry(first.to_string())
811                .or_default()
812                .insert(rest.to_string());
813        } else {
814            direct_removes.insert(path.clone());
815        }
816    }
817
818    let mut editor = repo
819        .edit_tree(tree_oid)
820        .map_err(|e| Error::Other(format!("{e}")))?;
821
822    for name in &direct_removes {
823        let _ = editor.remove(name);
824    }
825
826    // For grouped paths, recurse into subtrees
827    let tree = tree_oid
828        .attach(repo)
829        .object()
830        .map_err(|e| Error::Other(format!("{e}")))?
831        .into_tree();
832    for (name, sub_paths) in &grouped {
833        let entry = tree.iter().find_map(|e| {
834            let e = e.ok()?;
835            if e.filename().to_str_lossy() == *name && e.mode().is_tree() {
836                Some(e.object_id())
837            } else {
838                None
839            }
840        });
841        if let Some(subtree_oid) = entry {
842            let new_oid = remove_subtrees(repo, subtree_oid, sub_paths)?;
843            let new_tree = new_oid
844                .attach(repo)
845                .object()
846                .map_err(|e| Error::Other(format!("{e}")))?
847                .into_tree();
848            if new_tree.iter().count() > 0 {
849                editor
850                    .upsert(name, gix::objs::tree::EntryKind::Tree, new_oid)
851                    .map_err(|e| Error::Other(format!("{e}")))?;
852            } else {
853                let _ = editor.remove(name);
854            }
855        }
856    }
857
858    Ok(editor
859        .write()
860        .map_err(|e| Error::Other(format!("{e}")))?
861        .detach())
862}
863
864/// Merge a [`TreeDir`] structure into an existing tree.
865///
866/// Existing entries not present in `dir` are preserved.
867/// Entries in `dir` overwrite existing entries with the same name.
868fn merge_dir_into_tree(
869    repo: &gix::Repository,
870    dir: &TreeDir,
871    existing_oid: gix::ObjectId,
872) -> Result<gix::ObjectId> {
873    let mut editor = repo
874        .edit_tree(existing_oid)
875        .map_err(|e| Error::Other(format!("{e}")))?;
876
877    for (name, content) in &dir.files {
878        let blob_oid: gix::ObjectId = repo
879            .write_blob(content)
880            .map_err(|e| Error::Other(format!("{e}")))?
881            .into();
882        editor
883            .upsert(name, gix::objs::tree::EntryKind::Blob, blob_oid)
884            .map_err(|e| Error::Other(format!("{e}")))?;
885    }
886
887    let existing_tree = existing_oid
888        .attach(repo)
889        .object()
890        .map_err(|e| Error::Other(format!("{e}")))?
891        .into_tree();
892    for (name, child_dir) in &dir.dirs {
893        let existing_child_oid = existing_tree.iter().find_map(|e| {
894            let e = e.ok()?;
895            if e.filename().to_str_lossy() == *name && e.mode().is_tree() {
896                Some(e.object_id())
897            } else {
898                None
899            }
900        });
901
902        let child_oid = if let Some(existing_child) = existing_child_oid {
903            merge_dir_into_tree(repo, child_dir, existing_child)?
904        } else {
905            build_dir(repo, child_dir)?
906        };
907        editor
908            .upsert(name, gix::objs::tree::EntryKind::Tree, child_oid)
909            .map_err(|e| Error::Other(format!("{e}")))?;
910    }
911
912    Ok(editor
913        .write()
914        .map_err(|e| Error::Other(format!("{e}")))?
915        .detach())
916}
917
918/// Prune a serialized tree by dropping entries older than the cutoff.
919///
920/// Returns the OID of the new (possibly smaller) tree. If the tree would
921/// be unchanged, the same OID is returned.
922///
923/// # Parameters
924///
925/// - `repo`: the Git repository
926/// - `tree_oid`: the root tree to prune
927/// - `rules`: the prune rules to apply
928/// - `db`: the metadata store (for potential future use by prune helpers)
929///
930/// # Errors
931///
932/// Returns an error if Git object reads/writes fail or cutoff parsing fails.
933pub fn prune_tree(
934    repo: &gix::Repository,
935    tree_oid: gix::ObjectId,
936    rules: &PruneRules,
937    db: &Store,
938    now_ms: i64,
939) -> Result<gix::ObjectId> {
940    let cutoff_ms = prune::parse_since_to_cutoff_ms(&rules.since, now_ms)?;
941    let min_size = rules.min_size.unwrap_or(0);
942
943    let tree = tree_oid
944        .attach(repo)
945        .object()
946        .map_err(|e| Error::Other(format!("{e}")))?
947        .into_tree();
948    let mut editor = repo
949        .empty_tree()
950        .edit()
951        .map_err(|e| Error::Other(format!("{e}")))?;
952
953    for entry_result in tree.iter() {
954        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
955        let name = entry.filename().to_str_lossy().to_string();
956
957        if name == "project" {
958            editor
959                .upsert(&name, entry.mode().kind(), entry.object_id())
960                .map_err(|e| Error::Other(format!("{e}")))?;
961            continue;
962        }
963
964        if entry.mode().is_tree() {
965            let subtree_oid = entry.object_id();
966
967            // Check min-size
968            if min_size > 0 {
969                let size = prune::compute_tree_size_for(repo, subtree_oid)?;
970                if size < min_size {
971                    editor
972                        .upsert(&name, entry.mode().kind(), subtree_oid)
973                        .map_err(|e| Error::Other(format!("{e}")))?;
974                    continue;
975                }
976            }
977
978            let pruned_oid = prune_target_type_tree(repo, subtree_oid, cutoff_ms, min_size, db)?;
979            let pruned_tree = pruned_oid
980                .attach(repo)
981                .object()
982                .map_err(|e| Error::Other(format!("{e}")))?
983                .into_tree();
984            if pruned_tree.iter().count() > 0 {
985                editor
986                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
987                    .map_err(|e| Error::Other(format!("{e}")))?;
988            }
989        } else {
990            editor
991                .upsert(&name, entry.mode().kind(), entry.object_id())
992                .map_err(|e| Error::Other(format!("{e}")))?;
993        }
994    }
995
996    Ok(editor
997        .write()
998        .map_err(|e| Error::Other(format!("{e}")))?
999        .detach())
1000}
1001
1002fn auto_prune_tree(
1003    repo: &gix::Repository,
1004    metadata_entries: &[SerializableEntry],
1005    tombstone_entries: &[TombstoneRecord],
1006    set_tombstone_entries: &[SetTombstoneRecord],
1007    list_tombstone_entries: &[ListTombstoneRecord],
1008    filter_rules: &[FilterRule],
1009    rules: &PruneRules,
1010    now_ms: i64,
1011) -> Result<gix::ObjectId> {
1012    let cutoff_ms = prune::parse_since_to_cutoff_ms(&rules.since, now_ms)?;
1013    let is_main_dest = |key: &str| -> bool {
1014        classify_key(key, filter_rules).is_some_and(|dests| dests.iter().any(|d| d == MAIN_DEST))
1015    };
1016
1017    let metadata = metadata_entries
1018        .iter()
1019        .filter(|entry| is_main_dest(&entry.key))
1020        .filter_map(|entry| prune_metadata_entry(entry, cutoff_ms).transpose())
1021        .collect::<Result<Vec<_>>>()?;
1022    let tombstones = tombstone_entries
1023        .iter()
1024        .filter(|entry| is_main_dest(&entry.key))
1025        .filter(|entry| entry.target_type == TargetType::Project || entry.timestamp >= cutoff_ms)
1026        .cloned()
1027        .collect::<Vec<_>>();
1028    let set_tombstones = set_tombstone_entries
1029        .iter()
1030        .filter(|entry| is_main_dest(&entry.key))
1031        .filter(|entry| entry.target_type == TargetType::Project || entry.timestamp >= cutoff_ms)
1032        .cloned()
1033        .collect::<Vec<_>>();
1034    let list_tombstones = list_tombstone_entries
1035        .iter()
1036        .filter(|entry| is_main_dest(&entry.key))
1037        .filter(|entry| entry.target_type == TargetType::Project || entry.timestamp >= cutoff_ms)
1038        .cloned()
1039        .collect::<Vec<_>>();
1040
1041    build_tree(
1042        repo,
1043        &metadata,
1044        &tombstones,
1045        &set_tombstones,
1046        &list_tombstones,
1047        None,
1048        None,
1049    )
1050}
1051
1052fn prune_metadata_entry(
1053    entry: &SerializableEntry,
1054    cutoff_ms: i64,
1055) -> Result<Option<SerializableEntry>> {
1056    if entry.target_type != TargetType::Project && entry.last_timestamp < cutoff_ms {
1057        return Ok(None);
1058    }
1059
1060    if entry.target_type != TargetType::Project && entry.value_type == ValueType::List {
1061        let retained = parse_entries(&entry.value)?
1062            .into_iter()
1063            .filter(|item| item.timestamp >= cutoff_ms)
1064            .collect::<Vec<_>>();
1065        let mut pruned = entry.clone();
1066        pruned.value = encode_entries(&retained)?;
1067        return Ok(Some(pruned));
1068    }
1069
1070    Ok(Some(entry.clone()))
1071}
1072
1073fn prune_target_type_tree(
1074    repo: &gix::Repository,
1075    tree_oid: gix::ObjectId,
1076    cutoff_ms: i64,
1077    min_size: u64,
1078    db: &Store,
1079) -> Result<gix::ObjectId> {
1080    let tree = tree_oid
1081        .attach(repo)
1082        .object()
1083        .map_err(|e| Error::Other(format!("{e}")))?
1084        .into_tree();
1085    let mut editor = repo
1086        .empty_tree()
1087        .edit()
1088        .map_err(|e| Error::Other(format!("{e}")))?;
1089
1090    for entry_result in tree.iter() {
1091        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1092        let name = entry.filename().to_str_lossy().to_string();
1093
1094        if entry.mode().is_tree() {
1095            let subtree_oid = entry.object_id();
1096            let pruned_oid = prune_subtree_recursive(repo, subtree_oid, cutoff_ms, min_size, db)?;
1097            let pruned_tree = pruned_oid
1098                .attach(repo)
1099                .object()
1100                .map_err(|e| Error::Other(format!("{e}")))?
1101                .into_tree();
1102            if pruned_tree.iter().count() > 0 {
1103                editor
1104                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1105                    .map_err(|e| Error::Other(format!("{e}")))?;
1106            }
1107        } else {
1108            editor
1109                .upsert(&name, entry.mode().kind(), entry.object_id())
1110                .map_err(|e| Error::Other(format!("{e}")))?;
1111        }
1112    }
1113
1114    Ok(editor
1115        .write()
1116        .map_err(|e| Error::Other(format!("{e}")))?
1117        .detach())
1118}
1119
1120fn prune_subtree_recursive(
1121    repo: &gix::Repository,
1122    tree_oid: gix::ObjectId,
1123    cutoff_ms: i64,
1124    _min_size: u64,
1125    _db: &Store,
1126) -> Result<gix::ObjectId> {
1127    let tree = tree_oid
1128        .attach(repo)
1129        .object()
1130        .map_err(|e| Error::Other(format!("{e}")))?
1131        .into_tree();
1132    let mut editor = repo
1133        .empty_tree()
1134        .edit()
1135        .map_err(|e| Error::Other(format!("{e}")))?;
1136
1137    for entry_result in tree.iter() {
1138        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1139        let name = entry.filename().to_str_lossy().to_string();
1140
1141        if entry.mode().is_tree() {
1142            if name == "__list" {
1143                let list_tree_oid = entry.object_id();
1144                let pruned_oid = prune_list_tree(repo, list_tree_oid, cutoff_ms)?;
1145                let pruned_tree = pruned_oid
1146                    .attach(repo)
1147                    .object()
1148                    .map_err(|e| Error::Other(format!("{e}")))?
1149                    .into_tree();
1150                if pruned_tree.iter().count() > 0 {
1151                    editor
1152                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1153                        .map_err(|e| Error::Other(format!("{e}")))?;
1154                }
1155            } else if name == "__tombstones" {
1156                let tomb_tree_oid = entry.object_id();
1157                let pruned_oid = prune_tombstone_tree(repo, tomb_tree_oid, cutoff_ms)?;
1158                let pruned_tree = pruned_oid
1159                    .attach(repo)
1160                    .object()
1161                    .map_err(|e| Error::Other(format!("{e}")))?
1162                    .into_tree();
1163                if pruned_tree.iter().count() > 0 {
1164                    editor
1165                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1166                        .map_err(|e| Error::Other(format!("{e}")))?;
1167                }
1168            } else {
1169                let subtree_oid = entry.object_id();
1170                let pruned_oid =
1171                    prune_subtree_recursive(repo, subtree_oid, cutoff_ms, _min_size, _db)?;
1172                let pruned_tree = pruned_oid
1173                    .attach(repo)
1174                    .object()
1175                    .map_err(|e| Error::Other(format!("{e}")))?
1176                    .into_tree();
1177                if pruned_tree.iter().count() > 0 {
1178                    editor
1179                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1180                        .map_err(|e| Error::Other(format!("{e}")))?;
1181                }
1182            }
1183        } else {
1184            editor
1185                .upsert(&name, entry.mode().kind(), entry.object_id())
1186                .map_err(|e| Error::Other(format!("{e}")))?;
1187        }
1188    }
1189
1190    Ok(editor
1191        .write()
1192        .map_err(|e| Error::Other(format!("{e}")))?
1193        .detach())
1194}
1195
1196fn prune_list_tree(
1197    repo: &gix::Repository,
1198    tree_oid: gix::ObjectId,
1199    cutoff_ms: i64,
1200) -> Result<gix::ObjectId> {
1201    let tree = tree_oid
1202        .attach(repo)
1203        .object()
1204        .map_err(|e| Error::Other(format!("{e}")))?
1205        .into_tree();
1206    let mut editor = repo
1207        .empty_tree()
1208        .edit()
1209        .map_err(|e| Error::Other(format!("{e}")))?;
1210
1211    for entry_result in tree.iter() {
1212        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1213        let name = entry.filename().to_str_lossy().to_string();
1214        // Entry names are formatted as "{timestamp_ms}-{hash5}"
1215        if let Some((ts_str, _)) = name.split_once('-') {
1216            if let Ok(ts) = ts_str.parse::<i64>() {
1217                if ts < cutoff_ms {
1218                    continue; // Drop old entry
1219                }
1220            }
1221        }
1222        editor
1223            .upsert(&name, entry.mode().kind(), entry.object_id())
1224            .map_err(|e| Error::Other(format!("{e}")))?;
1225    }
1226
1227    Ok(editor
1228        .write()
1229        .map_err(|e| Error::Other(format!("{e}")))?
1230        .detach())
1231}
1232
1233fn prune_tombstone_tree(
1234    repo: &gix::Repository,
1235    tree_oid: gix::ObjectId,
1236    cutoff_ms: i64,
1237) -> Result<gix::ObjectId> {
1238    let tree = tree_oid
1239        .attach(repo)
1240        .object()
1241        .map_err(|e| Error::Other(format!("{e}")))?
1242        .into_tree();
1243    let mut editor = repo
1244        .empty_tree()
1245        .edit()
1246        .map_err(|e| Error::Other(format!("{e}")))?;
1247
1248    for entry_result in tree.iter() {
1249        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1250        let name = entry.filename().to_str_lossy().to_string();
1251
1252        if entry.mode().is_tree() {
1253            let subtree_oid = entry.object_id();
1254            let pruned_oid = prune_tombstone_tree(repo, subtree_oid, cutoff_ms)?;
1255            let pruned_tree = pruned_oid
1256                .attach(repo)
1257                .object()
1258                .map_err(|e| Error::Other(format!("{e}")))?
1259                .into_tree();
1260            if pruned_tree.iter().count() > 0 {
1261                editor
1262                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1263                    .map_err(|e| Error::Other(format!("{e}")))?;
1264            }
1265        } else if entry.mode().is_blob() && name == "__deleted" {
1266            let blob = entry
1267                .object_id()
1268                .attach(repo)
1269                .object()
1270                .map_err(|e| Error::Other(format!("{e}")))?
1271                .into_blob();
1272            if let Ok(content) = std::str::from_utf8(&blob.data) {
1273                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
1274                    if let Some(ts) = parsed.get("timestamp").and_then(serde_json::Value::as_i64) {
1275                        if ts < cutoff_ms {
1276                            continue; // Drop old tombstone
1277                        }
1278                    }
1279                }
1280            }
1281            editor
1282                .upsert(&name, entry.mode().kind(), entry.object_id())
1283                .map_err(|e| Error::Other(format!("{e}")))?;
1284        } else {
1285            editor
1286                .upsert(&name, entry.mode().kind(), entry.object_id())
1287                .map_err(|e| Error::Other(format!("{e}")))?;
1288        }
1289    }
1290
1291    Ok(editor
1292        .write()
1293        .map_err(|e| Error::Other(format!("{e}")))?
1294        .detach())
1295}
1296
1297/// Count keys in original and pruned trees to produce stats.
1298///
1299/// Returns `(keys_dropped, keys_retained)`.
1300///
1301/// # Errors
1302///
1303/// Returns an error if Git object reads fail.
1304pub fn count_prune_stats(
1305    repo: &gix::Repository,
1306    original_oid: gix::ObjectId,
1307    pruned_oid: gix::ObjectId,
1308) -> Result<(u64, u64)> {
1309    let mut original_count = 0u64;
1310    count_all_blobs(repo, original_oid, &mut original_count)?;
1311
1312    let mut pruned_count = 0u64;
1313    count_all_blobs(repo, pruned_oid, &mut pruned_count)?;
1314
1315    let dropped = original_count.saturating_sub(pruned_count);
1316    Ok((dropped, pruned_count))
1317}
1318
1319fn count_all_blobs(repo: &gix::Repository, tree_oid: gix::ObjectId, count: &mut u64) -> Result<()> {
1320    let tree = tree_oid
1321        .attach(repo)
1322        .object()
1323        .map_err(|e| Error::Other(format!("{e}")))?
1324        .into_tree();
1325    for entry_result in tree.iter() {
1326        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1327        if entry.mode().is_blob() {
1328            *count += 1;
1329        } else if entry.mode().is_tree() {
1330            count_all_blobs(repo, entry.object_id(), count)?;
1331        }
1332    }
1333    Ok(())
1334}