Skip to main content

grit_lib/
textconv_cache.rs

1//! Git-compatible `diff.<driver>.cachetextconv` storage under `refs/notes/textconv/<driver>`.
2//!
3//! Matches Git's `notes-cache.c`: the notes ref's **commit subject** stores the current
4//! `diff.<driver>.textconv` command string; when it changes, the cache is treated as empty.
5
6use std::collections::BTreeMap;
7use std::path::Path;
8
9use crate::config::ConfigSet;
10use crate::objects::{
11    parse_commit, parse_tree, serialize_commit, serialize_tree, tree_entry_cmp, CommitData,
12    ObjectId, ObjectKind, TreeEntry,
13};
14use crate::odb::Odb;
15use crate::refs::{resolve_ref, write_ref};
16
17#[derive(Clone)]
18struct NotesEntry {
19    path: Vec<u8>,
20    blob_oid: ObjectId,
21}
22
23fn note_object_hex(path: &[u8]) -> Option<String> {
24    let compact: Vec<u8> = path.iter().copied().filter(|b| *b != b'/').collect();
25    if compact.len() != 40 || !compact.iter().all(u8::is_ascii_hexdigit) {
26        return None;
27    }
28    String::from_utf8(compact)
29        .ok()
30        .map(|s| s.to_ascii_lowercase())
31}
32
33fn collect_tree_entries(
34    odb: &Odb,
35    tree_oid: &ObjectId,
36    prefix: &[u8],
37    out: &mut Vec<NotesEntry>,
38) -> crate::error::Result<()> {
39    let tree_obj = odb.read(tree_oid)?;
40    if tree_obj.kind != ObjectKind::Tree {
41        return Err(crate::error::Error::CorruptObject(
42            "notes tree is not a tree object".to_owned(),
43        ));
44    }
45    for entry in parse_tree(&tree_obj.data)? {
46        let mut path = prefix.to_vec();
47        if !path.is_empty() {
48            path.push(b'/');
49        }
50        path.extend_from_slice(&entry.name);
51        if entry.mode == 0o040000 {
52            collect_tree_entries(odb, &entry.oid, &path, out)?;
53        } else {
54            out.push(NotesEntry {
55                path,
56                blob_oid: entry.oid,
57            });
58        }
59    }
60    Ok(())
61}
62
63fn read_notes_entries(
64    odb: &Odb,
65    git_dir: &Path,
66    notes_ref: &str,
67) -> crate::error::Result<Vec<NotesEntry>> {
68    let Ok(commit_oid) = resolve_ref(git_dir, notes_ref) else {
69        return Ok(Vec::new());
70    };
71    let commit_obj = odb.read(&commit_oid)?;
72    if commit_obj.kind != ObjectKind::Commit {
73        return Err(crate::error::Error::CorruptObject(
74            "notes ref does not point to a commit".to_owned(),
75        ));
76    }
77    let commit = parse_commit(&commit_obj.data)?;
78    let mut out = Vec::new();
79    collect_tree_entries(odb, &commit.tree, b"", &mut out)?;
80    Ok(out)
81}
82
83fn notes_fanout(entries: &[NotesEntry]) -> usize {
84    let mut note_count = entries
85        .iter()
86        .filter(|e| note_object_hex(&e.path).is_some())
87        .count();
88    let mut fanout = 0usize;
89    while note_count > 0xff {
90        note_count >>= 8;
91        fanout += 1;
92    }
93    fanout
94}
95
96fn path_with_fanout(hex: &str, fanout: usize) -> Vec<u8> {
97    let mut path = Vec::with_capacity(hex.len() + fanout);
98    let bytes = hex.as_bytes();
99    let split = fanout.min(bytes.len() / 2);
100    for idx in 0..split {
101        let start = idx * 2;
102        path.extend_from_slice(&bytes[start..start + 2]);
103        path.push(b'/');
104    }
105    path.extend_from_slice(&bytes[split * 2..]);
106    path
107}
108
109enum NotesChild {
110    Blob(ObjectId),
111    Tree(Vec<NotesEntry>),
112}
113
114fn write_notes_subtree(odb: &Odb, entries: &[NotesEntry]) -> crate::error::Result<ObjectId> {
115    let mut children: BTreeMap<Vec<u8>, NotesChild> = BTreeMap::new();
116    for entry in entries {
117        if let Some(slash_pos) = entry.path.iter().position(|b| *b == b'/') {
118            let child_name = entry.path[..slash_pos].to_vec();
119            let child_entry = NotesEntry {
120                path: entry.path[slash_pos + 1..].to_vec(),
121                blob_oid: entry.blob_oid,
122            };
123            children
124                .entry(child_name.clone())
125                .or_insert_with(|| NotesChild::Tree(Vec::new()));
126            if let Some(NotesChild::Tree(tree_entries)) = children.get_mut(&child_name) {
127                tree_entries.push(child_entry);
128            }
129        } else {
130            children.insert(entry.path.clone(), NotesChild::Blob(entry.blob_oid));
131        }
132    }
133    let mut tree_entries = Vec::with_capacity(children.len());
134    for (name, child) in children {
135        match child {
136            NotesChild::Blob(oid) => tree_entries.push(TreeEntry {
137                mode: 0o100644,
138                name,
139                oid,
140            }),
141            NotesChild::Tree(child_entries) => {
142                let oid = write_notes_subtree(odb, &child_entries)?;
143                tree_entries.push(TreeEntry {
144                    mode: 0o040000,
145                    name,
146                    oid,
147                });
148            }
149        }
150    }
151    tree_entries
152        .sort_by(|a, b| tree_entry_cmp(&a.name, a.mode == 0o040000, &b.name, b.mode == 0o040000));
153    let data = serialize_tree(&tree_entries);
154    odb.write(ObjectKind::Tree, &data)
155}
156
157fn write_notes_ref(
158    odb: &Odb,
159    git_dir: &Path,
160    notes_ref: &str,
161    entries: &[NotesEntry],
162    message: &str,
163) -> crate::error::Result<()> {
164    let fanout = notes_fanout(entries);
165    let rewritten: Vec<NotesEntry> = entries
166        .iter()
167        .map(|e| NotesEntry {
168            path: note_object_hex(&e.path)
169                .map(|h| path_with_fanout(&h, fanout))
170                .unwrap_or_else(|| e.path.clone()),
171            blob_oid: e.blob_oid,
172        })
173        .collect();
174    let tree_oid = write_notes_subtree(odb, &rewritten)?;
175    let parent = resolve_ref(git_dir, notes_ref).ok();
176    let config = ConfigSet::load(Some(git_dir), true).unwrap_or_default();
177    let now = time::OffsetDateTime::now_utc();
178    let ident = grit_ident(&config, now);
179    let commit = CommitData {
180        tree: tree_oid,
181        parents: parent.into_iter().collect(),
182        author: ident.clone(),
183        committer: ident,
184        author_raw: Vec::new(),
185        committer_raw: Vec::new(),
186        encoding: None,
187        message: if message.ends_with('\n') {
188            message.to_owned()
189        } else {
190            format!("{message}\n")
191        },
192        raw_message: None,
193    };
194    let bytes = serialize_commit(&commit);
195    let commit_oid = odb.write(ObjectKind::Commit, &bytes)?;
196    write_ref(git_dir, notes_ref, &commit_oid)?;
197    Ok(())
198}
199
200fn grit_ident(config: &ConfigSet, now: time::OffsetDateTime) -> String {
201    let name = std::env::var("GIT_COMMITTER_NAME")
202        .ok()
203        .or_else(|| config.get("user.name"))
204        .unwrap_or_else(|| "grit".to_owned());
205    let email = std::env::var("GIT_COMMITTER_EMAIL")
206        .ok()
207        .or_else(|| config.get("user.email"))
208        .unwrap_or_default();
209    let epoch = now.unix_timestamp();
210    let offset = now.offset();
211    let hours = offset.whole_hours();
212    let minutes = offset.minutes_past_hour().unsigned_abs();
213    format!("{name} <{email}> {epoch} {hours:+03}{minutes:02}")
214}
215
216fn cache_commit_message_trimmed(odb: &Odb, git_dir: &Path, notes_ref: &str) -> Option<String> {
217    let commit_oid = resolve_ref(git_dir, notes_ref).ok()?;
218    let obj = odb.read(&commit_oid).ok()?;
219    if obj.kind != ObjectKind::Commit {
220        return None;
221    }
222    let c = parse_commit(&obj.data).ok()?;
223    let mut msg = c.message;
224    while msg.ends_with('\n') {
225        msg.pop();
226    }
227    Some(msg)
228}
229
230fn cache_validity_matches(odb: &Odb, git_dir: &Path, notes_ref: &str, validity: &str) -> bool {
231    let Some(stored) = cache_commit_message_trimmed(odb, git_dir, notes_ref) else {
232        return false;
233    };
234    stored == validity
235}
236
237fn find_cached_blob_oid(entries: &[NotesEntry], blob_oid: &ObjectId) -> Option<ObjectId> {
238    let hex = blob_oid.to_hex();
239    for e in entries {
240        if note_object_hex(&e.path).as_deref() == Some(hex.as_str()) {
241            return Some(e.blob_oid);
242        }
243    }
244    None
245}
246
247/// Read cached textconv bytes for `blob_oid`, or `None` on miss / invalid cache.
248pub fn read_textconv_cache(
249    odb: &Odb,
250    git_dir: &Path,
251    driver: &str,
252    validity: &str,
253    blob_oid: &ObjectId,
254) -> Option<Vec<u8>> {
255    let notes_ref = format!("refs/notes/textconv/{driver}");
256    if !cache_validity_matches(odb, git_dir, &notes_ref, validity) {
257        return None;
258    }
259    let entries = read_notes_entries(odb, git_dir, &notes_ref).ok()?;
260    let note_blob = find_cached_blob_oid(&entries, blob_oid)?;
261    let obj = odb.read(&note_blob).ok()?;
262    if obj.kind != ObjectKind::Blob {
263        return None;
264    }
265    Some(obj.data)
266}
267
268/// Store `data` as the note for `blob_oid`. Errors are ignored (read-only repos).
269pub fn write_textconv_cache(
270    odb: &Odb,
271    git_dir: &Path,
272    driver: &str,
273    validity: &str,
274    blob_oid: &ObjectId,
275    data: &[u8],
276) {
277    let notes_ref = format!("refs/notes/textconv/{driver}");
278    let mut entries = if cache_validity_matches(odb, git_dir, &notes_ref, validity) {
279        read_notes_entries(odb, git_dir, &notes_ref).unwrap_or_default()
280    } else {
281        Vec::new()
282    };
283    let hex = blob_oid.to_hex();
284    entries.retain(|e| note_object_hex(&e.path).as_deref() != Some(hex.as_str()));
285    let value_oid = match odb.write(ObjectKind::Blob, data) {
286        Ok(oid) => oid,
287        Err(_) => return,
288    };
289    entries.push(NotesEntry {
290        path: hex.into_bytes(),
291        blob_oid: value_oid,
292    });
293    let _ = write_notes_ref(odb, git_dir, &notes_ref, &entries, validity);
294}