Skip to main content

git_meta_lib/
serialize.rs

1//! Serialize local metadata to Git tree(s) and commit(s).
2//!
3//! This module implements the full serialization workflow: reading metadata
4//! from the SQLite store, building Git trees (full or incremental), creating
5//! commits, updating refs, and optionally auto-pruning old entries.
6//!
7//! The public entry point is [`run()`], which takes a [`Session`](crate::Session)
8//! and returns a [`SerializeOutput`] describing what was written.
9
10use std::collections::{BTreeMap, BTreeSet};
11
12use gix::bstr::ByteSlice;
13use gix::prelude::ObjectIdExt;
14use gix::refs::transaction::PreviousValue;
15
16use crate::db::types::{
17    ListTombstoneRecord, Operation, SerializableEntry, SetTombstoneRecord, TombstoneRecord,
18};
19use crate::db::Store;
20use crate::error::{Error, Result};
21use crate::list_value::{encode_entries, make_entry_name, parse_entries};
22use crate::prune::{self, PruneRules};
23use crate::session::Session;
24use crate::tree::filter::{classify_key, parse_filter_rules, FilterRule, MAIN_DEST};
25use crate::tree::format::{build_dir, build_tree_from_paths, insert_path, TreeDir};
26use crate::tree::model::Tombstone;
27use crate::tree_paths;
28use crate::types::{Target, TargetType, ValueType};
29
30/// Maximum number of individual change lines included in a commit message.
31const MAX_COMMIT_CHANGES: usize = 1000;
32
33/// Result of a serialize operation.
34///
35/// Contains all the information needed by a CLI or other consumer
36/// to report what happened, without performing any I/O itself.
37#[must_use]
38#[derive(Debug, Clone, PartialEq, Eq, Default)]
39pub struct SerializeOutput {
40    /// Number of metadata changes serialized (total entries across all destinations).
41    pub changes: usize,
42    /// Refs that were written, e.g. `["refs/meta/local/main"]`.
43    pub refs_written: Vec<String>,
44    /// Number of entries dropped by auto-prune (0 if no prune triggered).
45    pub pruned: u64,
46}
47
48/// Serialization mode used for progress reporting.
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum SerializeMode {
51    /// Incremental serialization based on metadata modified since the last materialization marker.
52    Incremental,
53    /// Full serialization from every hydrated SQLite row.
54    Full,
55}
56
57/// Progress event emitted while serializing metadata.
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub enum SerializeProgress {
60    /// SQLite metadata is being read.
61    Reading {
62        /// Whether the run is incremental or full.
63        mode: SerializeMode,
64    },
65    /// SQLite metadata has been read.
66    Read {
67        /// Number of hydrated metadata entries read.
68        metadata: usize,
69        /// Number of metadata tombstones read.
70        tombstones: usize,
71        /// Number of set-member tombstones read.
72        set_tombstones: usize,
73        /// Number of list-entry tombstones read.
74        list_tombstones: usize,
75        /// Number of change records that will be described in the serialize commit.
76        changes: usize,
77    },
78    /// Old metadata was skipped by `meta:prune:since`.
79    Pruned {
80        /// Number of metadata entries skipped before tree construction.
81        entries: u64,
82    },
83    /// Metadata has been routed to destination refs.
84    Routed {
85        /// Number of destination refs that may be written.
86        destinations: usize,
87        /// Number of metadata/tombstone records routed across all destinations.
88        records: usize,
89    },
90    /// A destination ref tree is being built.
91    BuildingRef {
92        /// Ref name being built.
93        ref_name: String,
94        /// Number of metadata/tombstone records included in this destination.
95        records: usize,
96    },
97    /// A destination ref was unchanged after rebuilding its tree.
98    RefUnchanged {
99        /// Ref name that did not need an update.
100        ref_name: String,
101    },
102    /// A destination ref was written.
103    RefWritten {
104        /// Ref name that was updated.
105        ref_name: String,
106    },
107    /// Auto-prune wrote a follow-up pruned commit.
108    AutoPruned {
109        /// Ref name that was auto-pruned.
110        ref_name: String,
111        /// Number of keys dropped from the serialized tree.
112        keys_dropped: u64,
113        /// Number of keys retained in the serialized tree.
114        keys_retained: u64,
115    },
116}
117
118/// Serialize local metadata to Git tree(s) and commit(s).
119///
120/// Determines incremental vs full mode automatically based on
121/// `last_materialized`, unless `force_full` is true. Applies filter routing
122/// and pruning rules. Updates local refs and the materialization timestamp.
123///
124/// # Parameters
125///
126/// - `session`: the gmeta session providing the repository, store, and config.
127/// - `now`: the current timestamp in milliseconds since the Unix epoch,
128///   used for the commit signature and the `last_materialized` marker.
129/// - `force_full`: when true, ignore incremental dirty-target detection and
130///   rebuild serialized trees from the complete SQLite state.
131///
132/// # Returns
133///
134/// A [`SerializeOutput`] with counts and written refs. If there is nothing
135/// to serialize, `changes` will be `0` and `refs_written` will be empty.
136///
137/// # Errors
138///
139/// Returns an error if database reads, Git object writes, or ref updates fail.
140pub fn run(session: &Session, now: i64, force_full: bool) -> Result<SerializeOutput> {
141    run_with_progress(session, now, force_full, |_| {})
142}
143
144/// Serialize local metadata and report progress through a callback.
145///
146/// # Parameters
147///
148/// - `session`: the gmeta session providing the repository, store, and config.
149/// - `now`: the current timestamp in milliseconds since the Unix epoch,
150///   used for the commit signature and the `last_materialized` marker.
151/// - `force_full`: when true, ignore incremental dirty-target detection and
152///   rebuild serialized trees from the complete SQLite state.
153/// - `progress`: callback invoked at major serialization steps.
154///
155/// # Returns
156///
157/// A [`SerializeOutput`] with counts and written refs. If there is nothing
158/// to serialize, `changes` will be `0` and `refs_written` will be empty.
159///
160/// # Errors
161///
162/// Returns an error if database reads, Git object writes, or ref updates fail.
163pub fn run_with_progress(
164    session: &Session,
165    now: i64,
166    force_full: bool,
167    mut progress: impl FnMut(SerializeProgress),
168) -> Result<SerializeOutput> {
169    let repo = &session.repo;
170    let local_ref_name = session.local_ref();
171    let last_materialized = session.store.get_last_materialized()?;
172
173    // Determine existing tree for incremental mode
174    let existing_tree_oid = ref_tree_oid(repo, &local_ref_name)?;
175
176    // Determine incremental vs full mode and collect entries + changes
177    let (
178        metadata_entries,
179        tombstone_entries,
180        set_tombstone_entries,
181        list_tombstone_entries,
182        dirty_target_bases,
183        changes,
184    ) = if let (false, Some(since)) = (force_full, last_materialized) {
185        progress(SerializeProgress::Reading {
186            mode: SerializeMode::Incremental,
187        });
188        let modified = session.store.get_modified_since(since)?;
189        let metadata = session.store.get_all_metadata()?;
190        let changes: Vec<(char, String, String)> = if modified.is_empty() {
191            metadata.iter().map(metadata_add_change).collect()
192        } else {
193            modified
194                .iter()
195                .map(|entry| {
196                    let op_char = match entry.operation {
197                        Operation::Remove => 'D',
198                        Operation::Set => {
199                            if existing_tree_oid.is_some() {
200                                'M'
201                            } else {
202                                'A'
203                            }
204                        }
205                        _ => 'M',
206                    };
207                    let target_label = if entry.target_type == TargetType::Project {
208                        "project".to_string()
209                    } else {
210                        format!("{}:{}", entry.target_type, entry.target_value)
211                    };
212                    (op_char, target_label, entry.key.clone())
213                })
214                .collect()
215        };
216
217        // Compute dirty target base paths from modified entries
218        let mut dirty_bases: BTreeSet<String> = BTreeSet::new();
219        for entry in &modified {
220            let target = if entry.target_type == TargetType::Project {
221                Target::parse("project")?
222            } else {
223                Target::parse(&format!("{}:{}", entry.target_type, entry.target_value))?
224            };
225            dirty_bases.insert(tree_paths::tree_base_path(&target));
226        }
227
228        let tombstones = session.store.get_all_tombstones()?;
229        let set_tombstones = session.store.get_all_set_tombstones()?;
230        let list_tombstones = session.store.get_all_list_tombstones()?;
231        progress(SerializeProgress::Read {
232            metadata: metadata.len(),
233            tombstones: tombstones.len(),
234            set_tombstones: set_tombstones.len(),
235            list_tombstones: list_tombstones.len(),
236            changes: changes.len(),
237        });
238
239        (
240            metadata,
241            tombstones,
242            set_tombstones,
243            list_tombstones,
244            if existing_tree_oid.is_some() && !modified.is_empty() {
245                Some(dirty_bases)
246            } else {
247                None
248            },
249            changes,
250        )
251    } else {
252        progress(SerializeProgress::Reading {
253            mode: SerializeMode::Full,
254        });
255        let metadata = session.store.get_all_metadata()?;
256
257        let changes: Vec<(char, String, String)> =
258            metadata.iter().map(metadata_add_change).collect();
259        let tombstones = session.store.get_all_tombstones()?;
260        let set_tombstones = session.store.get_all_set_tombstones()?;
261        let list_tombstones = session.store.get_all_list_tombstones()?;
262        progress(SerializeProgress::Read {
263            metadata: metadata.len(),
264            tombstones: tombstones.len(),
265            set_tombstones: set_tombstones.len(),
266            list_tombstones: list_tombstones.len(),
267            changes: changes.len(),
268        });
269
270        (
271            metadata,
272            tombstones,
273            set_tombstones,
274            list_tombstones,
275            None,
276            changes,
277        )
278    };
279
280    if metadata_entries.is_empty() && tombstone_entries.is_empty() {
281        return Ok(SerializeOutput {
282            changes: 0,
283            refs_written: Vec::new(),
284            pruned: 0,
285        });
286    }
287
288    let prune_rules = if force_full {
289        None
290    } else {
291        prune::read_prune_rules(&session.store)?
292    };
293
294    // Route entries through filter rules to destinations
295    let filter_rules = parse_filter_rules(&session.store)?;
296
297    let mut dest_metadata: BTreeMap<String, Vec<SerializableEntry>> = BTreeMap::new();
298    let mut dest_tombstones: BTreeMap<String, Vec<TombstoneRecord>> = BTreeMap::new();
299    let mut dest_set_tombstones: BTreeMap<String, Vec<SetTombstoneRecord>> = BTreeMap::new();
300    let mut dest_list_tombstones: BTreeMap<String, Vec<ListTombstoneRecord>> = BTreeMap::new();
301
302    for entry in &metadata_entries {
303        let key = &entry.key;
304        if let Some(dests) = classify_key(key, &filter_rules) {
305            for dest in dests {
306                dest_metadata.entry(dest).or_default().push(entry.clone());
307            }
308        }
309    }
310
311    for entry in &tombstone_entries {
312        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
313            for dest in dests {
314                dest_tombstones.entry(dest).or_default().push(entry.clone());
315            }
316        }
317    }
318
319    for entry in &set_tombstone_entries {
320        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
321            for dest in dests {
322                dest_set_tombstones
323                    .entry(dest)
324                    .or_default()
325                    .push(entry.clone());
326            }
327        }
328    }
329
330    for entry in &list_tombstone_entries {
331        if let Some(dests) = classify_key(&entry.key, &filter_rules) {
332            for dest in dests {
333                dest_list_tombstones
334                    .entry(dest)
335                    .or_default()
336                    .push(entry.clone());
337            }
338        }
339    }
340
341    // Ensure "main" is always present
342    dest_metadata.entry(MAIN_DEST.to_string()).or_default();
343
344    let mut all_dests: BTreeSet<String> = BTreeSet::new();
345    all_dests.extend(dest_metadata.keys().cloned());
346    all_dests.extend(dest_tombstones.keys().cloned());
347    all_dests.extend(dest_set_tombstones.keys().cloned());
348    all_dests.extend(dest_list_tombstones.keys().cloned());
349
350    let total_changes: usize = dest_metadata
351        .values()
352        .map(std::vec::Vec::len)
353        .sum::<usize>()
354        + dest_tombstones
355            .values()
356            .map(std::vec::Vec::len)
357            .sum::<usize>()
358        + dest_set_tombstones
359            .values()
360            .map(std::vec::Vec::len)
361            .sum::<usize>()
362        + dest_list_tombstones
363            .values()
364            .map(std::vec::Vec::len)
365            .sum::<usize>();
366    progress(SerializeProgress::Routed {
367        destinations: all_dests.len(),
368        records: total_changes,
369    });
370
371    let name = session.name();
372    let email = session.email();
373    let sig = gix::actor::Signature {
374        name: name.into(),
375        email: email.into(),
376        time: gix::date::Time::new(now / 1000, 0),
377    };
378
379    let mut refs_written = Vec::new();
380    let mut auto_pruned = 0u64;
381
382    for dest in &all_dests {
383        let ref_name = session.destination_ref(dest);
384        let empty_meta: Vec<SerializableEntry> = Vec::new();
385        let empty_tomb: Vec<TombstoneRecord> = Vec::new();
386        let empty_set_tomb: Vec<SetTombstoneRecord> = Vec::new();
387        let empty_list_tomb: Vec<ListTombstoneRecord> = Vec::new();
388
389        let meta = dest_metadata.get(dest).unwrap_or(&empty_meta);
390        let tombs = dest_tombstones.get(dest).unwrap_or(&empty_tomb);
391        let set_tombs = dest_set_tombstones.get(dest).unwrap_or(&empty_set_tomb);
392        let list_tombs = dest_list_tombstones.get(dest).unwrap_or(&empty_list_tomb);
393
394        if meta.is_empty() && tombs.is_empty() && set_tombs.is_empty() && list_tombs.is_empty() {
395            continue;
396        }
397        let dest_records = meta.len() + tombs.len() + set_tombs.len() + list_tombs.len();
398        progress(SerializeProgress::BuildingRef {
399            ref_name: ref_name.clone(),
400            records: dest_records,
401        });
402
403        // Use incremental mode only for the main destination
404        let (existing, dirty) = if dest == MAIN_DEST {
405            (existing_tree_oid, dirty_target_bases.as_ref())
406        } else {
407            (None, None)
408        };
409
410        let tree_oid = build_tree(repo, meta, tombs, set_tombs, list_tombs, existing, dirty)?;
411
412        let parent_oid = repo
413            .find_reference(&ref_name)
414            .ok()
415            .and_then(|r| r.into_fully_peeled_id().ok())
416            .map(gix::Id::detach);
417
418        let parent_tree_oid = parent_oid.as_ref().and_then(|oid| {
419            oid.attach(repo)
420                .object()
421                .ok()?
422                .into_commit()
423                .tree_id()
424                .ok()
425                .map(gix::Id::detach)
426        });
427        if parent_tree_oid == Some(tree_oid) {
428            progress(SerializeProgress::RefUnchanged {
429                ref_name: ref_name.clone(),
430            });
431            continue;
432        }
433
434        let parents: Vec<gix::ObjectId> = parent_oid.into_iter().collect();
435        let commit_message = build_commit_message(&changes);
436        let commit = gix::objs::Commit {
437            message: commit_message.into(),
438            tree: tree_oid,
439            author: sig.clone(),
440            committer: sig.clone(),
441            encoding: None,
442            parents: parents.into(),
443            extra_headers: Default::default(),
444        };
445
446        let commit_oid = repo
447            .write_object(&commit)
448            .map_err(|e| Error::Other(format!("{e}")))?
449            .detach();
450        repo.reference(
451            ref_name.as_str(),
452            commit_oid,
453            PreviousValue::Any,
454            "git-meta: serialize",
455        )
456        .map_err(|e| Error::Other(format!("{e}")))?;
457
458        refs_written.push(ref_name.clone());
459        progress(SerializeProgress::RefWritten {
460            ref_name: ref_name.clone(),
461        });
462
463        // Auto-prune only for main destination
464        if dest == MAIN_DEST {
465            if let Some(ref prune_rules_val) = prune_rules {
466                if prune::should_prune(repo, tree_oid, prune_rules_val)? {
467                    let prune_tree_oid = auto_prune_tree(
468                        repo,
469                        AutoPruneInputs {
470                            metadata_entries: &metadata_entries,
471                            tombstone_entries: &tombstone_entries,
472                            set_tombstone_entries: &set_tombstone_entries,
473                            list_tombstone_entries: &list_tombstone_entries,
474                            filter_rules: &filter_rules,
475                            rules: prune_rules_val,
476                            now_ms: now,
477                        },
478                    )?;
479
480                    if prune_tree_oid != tree_oid {
481                        let prune_parent_oid = repo
482                            .find_reference(&ref_name)
483                            .map_err(|e| Error::Other(format!("{e}")))?
484                            .into_fully_peeled_id()
485                            .map_err(|e| Error::Other(format!("{e}")))?
486                            .detach();
487
488                        let (keys_dropped, keys_retained) =
489                            count_prune_stats(repo, tree_oid, prune_tree_oid)?;
490
491                        auto_pruned = keys_dropped;
492                        progress(SerializeProgress::AutoPruned {
493                            ref_name: ref_name.clone(),
494                            keys_dropped,
495                            keys_retained,
496                        });
497
498                        let min_size_str = prune_rules_val
499                            .min_size
500                            .map(|s| format!("\nmin-size: {s}"))
501                            .unwrap_or_default();
502
503                        let message = format!(
504                            "git-meta: prune --since={}\n\npruned: true\nsince: {}{}\nkeys-dropped: {}\nkeys-retained: {}",
505                            prune_rules_val.since, prune_rules_val.since, min_size_str, keys_dropped, keys_retained
506                        );
507
508                        let prune_commit = gix::objs::Commit {
509                            message: message.into(),
510                            tree: prune_tree_oid,
511                            author: sig.clone(),
512                            committer: sig.clone(),
513                            encoding: None,
514                            parents: vec![prune_parent_oid].into(),
515                            extra_headers: Default::default(),
516                        };
517
518                        let _prune_commit_oid = repo
519                            .write_object(&prune_commit)
520                            .map_err(|e| Error::Other(format!("{e}")))?
521                            .detach();
522                        repo.reference(
523                            ref_name.as_str(),
524                            _prune_commit_oid,
525                            PreviousValue::Any,
526                            "git-meta: auto-prune",
527                        )
528                        .map_err(|e| Error::Other(format!("{e}")))?;
529                    }
530                }
531            }
532        }
533    }
534
535    session.store.set_last_materialized(now)?;
536
537    Ok(SerializeOutput {
538        changes: if refs_written.is_empty() {
539            0
540        } else {
541            total_changes
542        },
543        refs_written,
544        pruned: auto_pruned,
545    })
546}
547
548fn metadata_add_change(entry: &SerializableEntry) -> (char, String, String) {
549    let target_label = if entry.target_type == TargetType::Project {
550        "project".to_string()
551    } else {
552        format!("{}:{}", entry.target_type, entry.target_value)
553    };
554    ('A', target_label, entry.key.clone())
555}
556
557fn ref_tree_oid(repo: &gix::Repository, ref_name: &str) -> Result<Option<gix::ObjectId>> {
558    repo.find_reference(ref_name)
559        .ok()
560        .and_then(|r| r.into_fully_peeled_id().ok())
561        .map(|id| {
562            id.object()
563                .map_err(|e| Error::Other(format!("{e}")))?
564                .into_commit()
565                .tree_id()
566                .map(gix::Id::detach)
567                .map_err(|e| Error::Other(format!("{e}")))
568        })
569        .transpose()
570}
571
572/// Build a commit message from a list of changes.
573///
574/// Each change is `(op_char, target_label, key)`.
575fn build_commit_message(changes: &[(char, String, String)]) -> String {
576    if changes.len() > MAX_COMMIT_CHANGES {
577        format!(
578            "git-meta: serialize ({} changes)\n\nchanges-omitted: true\ncount: {}",
579            changes.len(),
580            changes.len()
581        )
582    } else {
583        let mut msg = format!("git-meta: serialize ({} changes)\n", changes.len());
584        for (op, target, key) in changes {
585            msg.push('\n');
586            msg.push(*op);
587            msg.push('\t');
588            msg.push_str(target);
589            msg.push('\t');
590            msg.push_str(key);
591        }
592        msg
593    }
594}
595
596/// Build a Git tree from pre-filtered metadata (no incremental mode).
597///
598/// Used by `git-meta prune` to rebuild a tree from only the surviving entries.
599///
600/// # Parameters
601///
602/// - `repo`: the Git repository to write objects into
603/// - `metadata_entries`: metadata entries to include
604/// - `tombstone_entries`: key tombstones
605/// - `set_tombstone_entries`: set-member tombstones
606/// - `list_tombstone_entries`: list-entry tombstones
607///
608/// # Returns
609///
610/// The OID of the root Git tree object.
611///
612/// # Errors
613///
614/// Returns an error if target parsing or Git object writes fail.
615#[cfg(feature = "internal")]
616pub fn build_filtered_tree(
617    repo: &gix::Repository,
618    metadata_entries: &[SerializableEntry],
619    tombstone_entries: &[TombstoneRecord],
620    set_tombstone_entries: &[SetTombstoneRecord],
621    list_tombstone_entries: &[ListTombstoneRecord],
622) -> Result<gix::ObjectId> {
623    build_tree(
624        repo,
625        metadata_entries,
626        tombstone_entries,
627        set_tombstone_entries,
628        list_tombstone_entries,
629        None,
630        None,
631    )
632}
633
634/// Build a complete Git tree from all metadata entries.
635///
636/// When `existing_tree_oid` and `dirty_target_bases` are provided, only entries
637/// belonging to dirty targets are processed; unchanged subtrees are reused
638/// from the existing tree by OID (incremental mode).
639fn build_tree(
640    repo: &gix::Repository,
641    metadata_entries: &[SerializableEntry],
642    tombstone_entries: &[TombstoneRecord],
643    set_tombstone_entries: &[SetTombstoneRecord],
644    list_tombstone_entries: &[ListTombstoneRecord],
645    existing_tree_oid: Option<gix::ObjectId>,
646    dirty_target_bases: Option<&BTreeSet<String>>,
647) -> Result<gix::ObjectId> {
648    let mut files: BTreeMap<String, Vec<u8>> = BTreeMap::new();
649
650    for e in metadata_entries {
651        let target = if e.target_type == TargetType::Project {
652            Target::parse("project")?
653        } else {
654            Target::parse(&format!("{}:{}", e.target_type, e.target_value))?
655        };
656
657        // Skip entries for clean targets -- their subtrees will be reused
658        if let Some(dirty) = dirty_target_bases {
659            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
660                continue;
661            }
662        }
663
664        match e.value_type {
665            ValueType::String => {
666                let full_path = tree_paths::tree_path(&target, &e.key)?;
667                if e.is_git_ref {
668                    let oid = gix::ObjectId::from_hex(e.value.as_bytes())
669                        .map_err(|e| Error::Other(format!("{e}")))?;
670                    let blob = oid
671                        .attach(repo)
672                        .object()
673                        .map_err(|e| Error::Other(format!("{e}")))?
674                        .into_blob();
675                    files.insert(full_path, blob.data.clone());
676                } else {
677                    let raw_value: String = match serde_json::from_str(&e.value) {
678                        Ok(s) => s,
679                        Err(_) => e.value.clone(),
680                    };
681                    files.insert(full_path, raw_value.into_bytes());
682                }
683            }
684            ValueType::List => {
685                let list_entries =
686                    parse_entries(&e.value).map_err(|e| Error::InvalidValue(format!("{e}")))?;
687                let list_dir_path = tree_paths::list_dir_path(&target, &e.key)?;
688                for entry in list_entries {
689                    let entry_name = make_entry_name(&entry);
690                    let full_path = format!("{list_dir_path}/{entry_name}");
691                    files.insert(full_path, entry.value.into_bytes());
692                }
693            }
694            ValueType::Set => {
695                let members: Vec<String> = serde_json::from_str(&e.value)
696                    .map_err(|e| Error::InvalidValue(format!("failed to decode set value: {e}")))?;
697                let set_dir_path = tree_paths::set_dir_path(&target, &e.key)?;
698                for member in members {
699                    let member_id = crate::types::set_member_id(&member);
700                    let full_path = format!("{set_dir_path}/{member_id}");
701                    files.insert(full_path, member.into_bytes());
702                }
703            }
704        }
705    }
706
707    for record in tombstone_entries {
708        let target = if record.target_type == TargetType::Project {
709            Target::parse("project")?
710        } else {
711            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
712        };
713
714        if let Some(dirty) = dirty_target_bases {
715            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
716                continue;
717            }
718        }
719
720        let full_path = tree_paths::tombstone_path(&target, &record.key)?;
721        let payload = serde_json::to_vec(&Tombstone {
722            timestamp: record.timestamp,
723            email: record.email.clone(),
724        })?;
725        files.insert(full_path, payload);
726    }
727
728    for record in set_tombstone_entries {
729        let target = if record.target_type == TargetType::Project {
730            Target::parse("project")?
731        } else {
732            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
733        };
734
735        if let Some(dirty) = dirty_target_bases {
736            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
737                continue;
738            }
739        }
740
741        let full_path =
742            tree_paths::set_member_tombstone_path(&target, &record.key, &record.member_id)?;
743        files.insert(full_path, record.value.as_bytes().to_vec());
744    }
745
746    for record in list_tombstone_entries {
747        let target = if record.target_type == TargetType::Project {
748            Target::parse("project")?
749        } else {
750            Target::parse(&format!("{}:{}", record.target_type, record.target_value))?
751        };
752
753        if let Some(dirty) = dirty_target_bases {
754            if !dirty.contains(&tree_paths::tree_base_path(&target)) {
755                continue;
756            }
757        }
758
759        let full_path =
760            tree_paths::list_entry_tombstone_path(&target, &record.key, &record.entry_name)?;
761        let payload = serde_json::to_vec(&Tombstone {
762            timestamp: record.timestamp,
763            email: record.email.clone(),
764        })?;
765        files.insert(full_path, payload);
766    }
767
768    // Build nested tree, reusing unchanged subtrees from existing tree
769    if let (Some(existing_oid), Some(dirty_bases)) = (existing_tree_oid, dirty_target_bases) {
770        build_tree_incremental(repo, existing_oid, &files, dirty_bases)
771    } else {
772        build_tree_from_paths(repo, &files)
773    }
774}
775
776/// Incrementally build a tree by patching an existing tree.
777///
778/// Only dirty target subtrees are rebuilt from `files`; all other subtrees
779/// are reused from the existing tree by OID.
780fn build_tree_incremental(
781    repo: &gix::Repository,
782    existing_tree_oid: gix::ObjectId,
783    files: &BTreeMap<String, Vec<u8>>,
784    dirty_target_bases: &BTreeSet<String>,
785) -> Result<gix::ObjectId> {
786    // Step 1: Remove dirty target subtrees from existing tree
787    let cleaned_oid = remove_subtrees(repo, existing_tree_oid, dirty_target_bases)?;
788
789    // Step 2: Build TreeDir from dirty files only
790    let mut root = TreeDir::default();
791    for (path, content) in files {
792        let parts: Vec<&str> = path.split('/').collect();
793        insert_path(&mut root, &parts, content.clone());
794    }
795
796    // Step 3: Merge new content into cleaned tree
797    merge_dir_into_tree(repo, &root, cleaned_oid)
798}
799
800/// Remove subtrees at specific paths from an existing tree.
801fn remove_subtrees(
802    repo: &gix::Repository,
803    tree_oid: gix::ObjectId,
804    paths: &BTreeSet<String>,
805) -> Result<gix::ObjectId> {
806    let mut grouped: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
807    let mut direct_removes: BTreeSet<String> = BTreeSet::new();
808
809    for path in paths {
810        if let Some((first, rest)) = path.split_once('/') {
811            grouped
812                .entry(first.to_string())
813                .or_default()
814                .insert(rest.to_string());
815        } else {
816            direct_removes.insert(path.clone());
817        }
818    }
819
820    let mut editor = repo
821        .edit_tree(tree_oid)
822        .map_err(|e| Error::Other(format!("{e}")))?;
823
824    for name in &direct_removes {
825        let _ = editor.remove(name);
826    }
827
828    // For grouped paths, recurse into subtrees
829    let tree = tree_oid
830        .attach(repo)
831        .object()
832        .map_err(|e| Error::Other(format!("{e}")))?
833        .into_tree();
834    for (name, sub_paths) in &grouped {
835        let entry = tree.iter().find_map(|e| {
836            let e = e.ok()?;
837            if e.filename().to_str_lossy() == *name && e.mode().is_tree() {
838                Some(e.object_id())
839            } else {
840                None
841            }
842        });
843        if let Some(subtree_oid) = entry {
844            let new_oid = remove_subtrees(repo, subtree_oid, sub_paths)?;
845            let new_tree = new_oid
846                .attach(repo)
847                .object()
848                .map_err(|e| Error::Other(format!("{e}")))?
849                .into_tree();
850            if new_tree.iter().count() > 0 {
851                editor
852                    .upsert(name, gix::objs::tree::EntryKind::Tree, new_oid)
853                    .map_err(|e| Error::Other(format!("{e}")))?;
854            } else {
855                let _ = editor.remove(name);
856            }
857        }
858    }
859
860    Ok(editor
861        .write()
862        .map_err(|e| Error::Other(format!("{e}")))?
863        .detach())
864}
865
866/// Merge a [`TreeDir`] structure into an existing tree.
867///
868/// Existing entries not present in `dir` are preserved.
869/// Entries in `dir` overwrite existing entries with the same name.
870fn merge_dir_into_tree(
871    repo: &gix::Repository,
872    dir: &TreeDir,
873    existing_oid: gix::ObjectId,
874) -> Result<gix::ObjectId> {
875    let mut editor = repo
876        .edit_tree(existing_oid)
877        .map_err(|e| Error::Other(format!("{e}")))?;
878
879    for (name, content) in &dir.files {
880        let blob_oid: gix::ObjectId = repo
881            .write_blob(content)
882            .map_err(|e| Error::Other(format!("{e}")))?
883            .into();
884        editor
885            .upsert(name, gix::objs::tree::EntryKind::Blob, blob_oid)
886            .map_err(|e| Error::Other(format!("{e}")))?;
887    }
888
889    let existing_tree = existing_oid
890        .attach(repo)
891        .object()
892        .map_err(|e| Error::Other(format!("{e}")))?
893        .into_tree();
894    for (name, child_dir) in &dir.dirs {
895        let existing_child_oid = existing_tree.iter().find_map(|e| {
896            let e = e.ok()?;
897            if e.filename().to_str_lossy() == *name && e.mode().is_tree() {
898                Some(e.object_id())
899            } else {
900                None
901            }
902        });
903
904        let child_oid = if let Some(existing_child) = existing_child_oid {
905            merge_dir_into_tree(repo, child_dir, existing_child)?
906        } else {
907            build_dir(repo, child_dir)?
908        };
909        editor
910            .upsert(name, gix::objs::tree::EntryKind::Tree, child_oid)
911            .map_err(|e| Error::Other(format!("{e}")))?;
912    }
913
914    Ok(editor
915        .write()
916        .map_err(|e| Error::Other(format!("{e}")))?
917        .detach())
918}
919
920/// Prune a serialized tree by dropping entries older than the cutoff.
921///
922/// Returns the OID of the new (possibly smaller) tree. If the tree would
923/// be unchanged, the same OID is returned.
924///
925/// # Parameters
926///
927/// - `repo`: the Git repository
928/// - `tree_oid`: the root tree to prune
929/// - `rules`: the prune rules to apply
930/// - `db`: the metadata store (for potential future use by prune helpers)
931///
932/// # Errors
933///
934/// Returns an error if Git object reads/writes fail or cutoff parsing fails.
935pub fn prune_tree(
936    repo: &gix::Repository,
937    tree_oid: gix::ObjectId,
938    rules: &PruneRules,
939    db: &Store,
940    now_ms: i64,
941) -> Result<gix::ObjectId> {
942    let cutoff_ms = prune::parse_since_to_cutoff_ms(&rules.since, now_ms)?;
943    let min_size = rules.min_size.unwrap_or(0);
944
945    let tree = tree_oid
946        .attach(repo)
947        .object()
948        .map_err(|e| Error::Other(format!("{e}")))?
949        .into_tree();
950    let mut editor = repo
951        .empty_tree()
952        .edit()
953        .map_err(|e| Error::Other(format!("{e}")))?;
954
955    for entry_result in tree.iter() {
956        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
957        let name = entry.filename().to_str_lossy().to_string();
958
959        if name == "project" {
960            editor
961                .upsert(&name, entry.mode().kind(), entry.object_id())
962                .map_err(|e| Error::Other(format!("{e}")))?;
963            continue;
964        }
965
966        if entry.mode().is_tree() {
967            let subtree_oid = entry.object_id();
968
969            // Check min-size
970            if min_size > 0 {
971                let size = prune::compute_tree_size_for(repo, subtree_oid)?;
972                if size < min_size {
973                    editor
974                        .upsert(&name, entry.mode().kind(), subtree_oid)
975                        .map_err(|e| Error::Other(format!("{e}")))?;
976                    continue;
977                }
978            }
979
980            let pruned_oid = prune_target_type_tree(repo, subtree_oid, cutoff_ms, min_size, db)?;
981            let pruned_tree = pruned_oid
982                .attach(repo)
983                .object()
984                .map_err(|e| Error::Other(format!("{e}")))?
985                .into_tree();
986            if pruned_tree.iter().count() > 0 {
987                editor
988                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
989                    .map_err(|e| Error::Other(format!("{e}")))?;
990            }
991        } else {
992            editor
993                .upsert(&name, entry.mode().kind(), entry.object_id())
994                .map_err(|e| Error::Other(format!("{e}")))?;
995        }
996    }
997
998    Ok(editor
999        .write()
1000        .map_err(|e| Error::Other(format!("{e}")))?
1001        .detach())
1002}
1003
1004struct AutoPruneInputs<'a> {
1005    metadata_entries: &'a [SerializableEntry],
1006    tombstone_entries: &'a [TombstoneRecord],
1007    set_tombstone_entries: &'a [SetTombstoneRecord],
1008    list_tombstone_entries: &'a [ListTombstoneRecord],
1009    filter_rules: &'a [FilterRule],
1010    rules: &'a PruneRules,
1011    now_ms: i64,
1012}
1013
1014fn auto_prune_tree(repo: &gix::Repository, inputs: AutoPruneInputs<'_>) -> Result<gix::ObjectId> {
1015    let cutoff_ms = prune::parse_since_to_cutoff_ms(&inputs.rules.since, inputs.now_ms)?;
1016    let is_main_dest = |key: &str| -> bool {
1017        classify_key(key, inputs.filter_rules)
1018            .is_some_and(|dests| dests.iter().any(|d| d == MAIN_DEST))
1019    };
1020
1021    let metadata = inputs
1022        .metadata_entries
1023        .iter()
1024        .filter(|entry| is_main_dest(&entry.key))
1025        .filter_map(|entry| prune_metadata_entry(entry, cutoff_ms).transpose())
1026        .collect::<Result<Vec<_>>>()?;
1027    let tombstones = inputs
1028        .tombstone_entries
1029        .iter()
1030        .filter(|entry| is_main_dest(&entry.key))
1031        .filter(|entry| entry.target_type == TargetType::Project || entry.timestamp >= cutoff_ms)
1032        .cloned()
1033        .collect::<Vec<_>>();
1034    let set_tombstones = inputs
1035        .set_tombstone_entries
1036        .iter()
1037        .filter(|entry| is_main_dest(&entry.key))
1038        .filter(|entry| entry.target_type == TargetType::Project || entry.timestamp >= cutoff_ms)
1039        .cloned()
1040        .collect::<Vec<_>>();
1041    let list_tombstones = inputs
1042        .list_tombstone_entries
1043        .iter()
1044        .filter(|entry| is_main_dest(&entry.key))
1045        .filter(|entry| entry.target_type == TargetType::Project || entry.timestamp >= cutoff_ms)
1046        .cloned()
1047        .collect::<Vec<_>>();
1048
1049    build_tree(
1050        repo,
1051        &metadata,
1052        &tombstones,
1053        &set_tombstones,
1054        &list_tombstones,
1055        None,
1056        None,
1057    )
1058}
1059
1060fn prune_metadata_entry(
1061    entry: &SerializableEntry,
1062    cutoff_ms: i64,
1063) -> Result<Option<SerializableEntry>> {
1064    if entry.target_type != TargetType::Project && entry.last_timestamp < cutoff_ms {
1065        return Ok(None);
1066    }
1067
1068    if entry.target_type != TargetType::Project && entry.value_type == ValueType::List {
1069        let retained = parse_entries(&entry.value)?
1070            .into_iter()
1071            .filter(|item| item.timestamp >= cutoff_ms)
1072            .collect::<Vec<_>>();
1073        let mut pruned = entry.clone();
1074        pruned.value = encode_entries(&retained)?;
1075        return Ok(Some(pruned));
1076    }
1077
1078    Ok(Some(entry.clone()))
1079}
1080
1081fn prune_target_type_tree(
1082    repo: &gix::Repository,
1083    tree_oid: gix::ObjectId,
1084    cutoff_ms: i64,
1085    min_size: u64,
1086    db: &Store,
1087) -> Result<gix::ObjectId> {
1088    let tree = tree_oid
1089        .attach(repo)
1090        .object()
1091        .map_err(|e| Error::Other(format!("{e}")))?
1092        .into_tree();
1093    let mut editor = repo
1094        .empty_tree()
1095        .edit()
1096        .map_err(|e| Error::Other(format!("{e}")))?;
1097
1098    for entry_result in tree.iter() {
1099        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1100        let name = entry.filename().to_str_lossy().to_string();
1101
1102        if entry.mode().is_tree() {
1103            let subtree_oid = entry.object_id();
1104            let pruned_oid = prune_subtree_recursive(repo, subtree_oid, cutoff_ms, min_size, db)?;
1105            let pruned_tree = pruned_oid
1106                .attach(repo)
1107                .object()
1108                .map_err(|e| Error::Other(format!("{e}")))?
1109                .into_tree();
1110            if pruned_tree.iter().count() > 0 {
1111                editor
1112                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1113                    .map_err(|e| Error::Other(format!("{e}")))?;
1114            }
1115        } else {
1116            editor
1117                .upsert(&name, entry.mode().kind(), entry.object_id())
1118                .map_err(|e| Error::Other(format!("{e}")))?;
1119        }
1120    }
1121
1122    Ok(editor
1123        .write()
1124        .map_err(|e| Error::Other(format!("{e}")))?
1125        .detach())
1126}
1127
1128fn prune_subtree_recursive(
1129    repo: &gix::Repository,
1130    tree_oid: gix::ObjectId,
1131    cutoff_ms: i64,
1132    _min_size: u64,
1133    _db: &Store,
1134) -> Result<gix::ObjectId> {
1135    let tree = tree_oid
1136        .attach(repo)
1137        .object()
1138        .map_err(|e| Error::Other(format!("{e}")))?
1139        .into_tree();
1140    let mut editor = repo
1141        .empty_tree()
1142        .edit()
1143        .map_err(|e| Error::Other(format!("{e}")))?;
1144
1145    for entry_result in tree.iter() {
1146        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1147        let name = entry.filename().to_str_lossy().to_string();
1148
1149        if entry.mode().is_tree() {
1150            if name == "__list" {
1151                let list_tree_oid = entry.object_id();
1152                let pruned_oid = prune_list_tree(repo, list_tree_oid, cutoff_ms)?;
1153                let pruned_tree = pruned_oid
1154                    .attach(repo)
1155                    .object()
1156                    .map_err(|e| Error::Other(format!("{e}")))?
1157                    .into_tree();
1158                if pruned_tree.iter().count() > 0 {
1159                    editor
1160                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1161                        .map_err(|e| Error::Other(format!("{e}")))?;
1162                }
1163            } else if name == "__tombstones" {
1164                let tomb_tree_oid = entry.object_id();
1165                let pruned_oid = prune_tombstone_tree(repo, tomb_tree_oid, cutoff_ms)?;
1166                let pruned_tree = pruned_oid
1167                    .attach(repo)
1168                    .object()
1169                    .map_err(|e| Error::Other(format!("{e}")))?
1170                    .into_tree();
1171                if pruned_tree.iter().count() > 0 {
1172                    editor
1173                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1174                        .map_err(|e| Error::Other(format!("{e}")))?;
1175                }
1176            } else {
1177                let subtree_oid = entry.object_id();
1178                let pruned_oid =
1179                    prune_subtree_recursive(repo, subtree_oid, cutoff_ms, _min_size, _db)?;
1180                let pruned_tree = pruned_oid
1181                    .attach(repo)
1182                    .object()
1183                    .map_err(|e| Error::Other(format!("{e}")))?
1184                    .into_tree();
1185                if pruned_tree.iter().count() > 0 {
1186                    editor
1187                        .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1188                        .map_err(|e| Error::Other(format!("{e}")))?;
1189                }
1190            }
1191        } else {
1192            editor
1193                .upsert(&name, entry.mode().kind(), entry.object_id())
1194                .map_err(|e| Error::Other(format!("{e}")))?;
1195        }
1196    }
1197
1198    Ok(editor
1199        .write()
1200        .map_err(|e| Error::Other(format!("{e}")))?
1201        .detach())
1202}
1203
1204fn prune_list_tree(
1205    repo: &gix::Repository,
1206    tree_oid: gix::ObjectId,
1207    cutoff_ms: i64,
1208) -> Result<gix::ObjectId> {
1209    let tree = tree_oid
1210        .attach(repo)
1211        .object()
1212        .map_err(|e| Error::Other(format!("{e}")))?
1213        .into_tree();
1214    let mut editor = repo
1215        .empty_tree()
1216        .edit()
1217        .map_err(|e| Error::Other(format!("{e}")))?;
1218
1219    for entry_result in tree.iter() {
1220        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1221        let name = entry.filename().to_str_lossy().to_string();
1222        // Entry names are formatted as "{timestamp_ms}-{hash5}"
1223        if let Some((ts_str, _)) = name.split_once('-') {
1224            if let Ok(ts) = ts_str.parse::<i64>() {
1225                if ts < cutoff_ms {
1226                    continue; // Drop old entry
1227                }
1228            }
1229        }
1230        editor
1231            .upsert(&name, entry.mode().kind(), entry.object_id())
1232            .map_err(|e| Error::Other(format!("{e}")))?;
1233    }
1234
1235    Ok(editor
1236        .write()
1237        .map_err(|e| Error::Other(format!("{e}")))?
1238        .detach())
1239}
1240
1241fn prune_tombstone_tree(
1242    repo: &gix::Repository,
1243    tree_oid: gix::ObjectId,
1244    cutoff_ms: i64,
1245) -> Result<gix::ObjectId> {
1246    let tree = tree_oid
1247        .attach(repo)
1248        .object()
1249        .map_err(|e| Error::Other(format!("{e}")))?
1250        .into_tree();
1251    let mut editor = repo
1252        .empty_tree()
1253        .edit()
1254        .map_err(|e| Error::Other(format!("{e}")))?;
1255
1256    for entry_result in tree.iter() {
1257        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1258        let name = entry.filename().to_str_lossy().to_string();
1259
1260        if entry.mode().is_tree() {
1261            let subtree_oid = entry.object_id();
1262            let pruned_oid = prune_tombstone_tree(repo, subtree_oid, cutoff_ms)?;
1263            let pruned_tree = pruned_oid
1264                .attach(repo)
1265                .object()
1266                .map_err(|e| Error::Other(format!("{e}")))?
1267                .into_tree();
1268            if pruned_tree.iter().count() > 0 {
1269                editor
1270                    .upsert(&name, gix::objs::tree::EntryKind::Tree, pruned_oid)
1271                    .map_err(|e| Error::Other(format!("{e}")))?;
1272            }
1273        } else if entry.mode().is_blob() && name == "__deleted" {
1274            let blob = entry
1275                .object_id()
1276                .attach(repo)
1277                .object()
1278                .map_err(|e| Error::Other(format!("{e}")))?
1279                .into_blob();
1280            if let Ok(content) = std::str::from_utf8(&blob.data) {
1281                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
1282                    if let Some(ts) = parsed.get("timestamp").and_then(serde_json::Value::as_i64) {
1283                        if ts < cutoff_ms {
1284                            continue; // Drop old tombstone
1285                        }
1286                    }
1287                }
1288            }
1289            editor
1290                .upsert(&name, entry.mode().kind(), entry.object_id())
1291                .map_err(|e| Error::Other(format!("{e}")))?;
1292        } else {
1293            editor
1294                .upsert(&name, entry.mode().kind(), entry.object_id())
1295                .map_err(|e| Error::Other(format!("{e}")))?;
1296        }
1297    }
1298
1299    Ok(editor
1300        .write()
1301        .map_err(|e| Error::Other(format!("{e}")))?
1302        .detach())
1303}
1304
1305/// Count keys in original and pruned trees to produce stats.
1306///
1307/// Returns `(keys_dropped, keys_retained)`.
1308///
1309/// # Errors
1310///
1311/// Returns an error if Git object reads fail.
1312pub fn count_prune_stats(
1313    repo: &gix::Repository,
1314    original_oid: gix::ObjectId,
1315    pruned_oid: gix::ObjectId,
1316) -> Result<(u64, u64)> {
1317    let mut original_count = 0u64;
1318    count_all_blobs(repo, original_oid, &mut original_count)?;
1319
1320    let mut pruned_count = 0u64;
1321    count_all_blobs(repo, pruned_oid, &mut pruned_count)?;
1322
1323    let dropped = original_count.saturating_sub(pruned_count);
1324    Ok((dropped, pruned_count))
1325}
1326
1327fn count_all_blobs(repo: &gix::Repository, tree_oid: gix::ObjectId, count: &mut u64) -> Result<()> {
1328    let tree = tree_oid
1329        .attach(repo)
1330        .object()
1331        .map_err(|e| Error::Other(format!("{e}")))?
1332        .into_tree();
1333    for entry_result in tree.iter() {
1334        let entry = entry_result.map_err(|e| Error::Other(format!("{e}")))?;
1335        if entry.mode().is_blob() {
1336            *count += 1;
1337        } else if entry.mode().is_tree() {
1338            count_all_blobs(repo, entry.object_id(), count)?;
1339        }
1340    }
1341    Ok(())
1342}