Trait git_pack::Find

source ·
pub trait Find {
    type Error: Error + Send + Sync + 'static;

    fn contains(&self, id: impl AsRef<oid>) -> bool;
    fn try_find_cached<'a>(
        &self,
        id: impl AsRef<oid>,
        buffer: &'a mut Vec<u8>,
        pack_cache: &mut impl DecodeEntry
    ) -> Result<Option<(Data<'a>, Option<Location>)>, Self::Error>; fn location_by_oid(
        &self,
        id: impl AsRef<oid>,
        buf: &mut Vec<u8>
    ) -> Option<Location>; fn pack_offsets_and_oid(
        &self,
        pack_id: u32
    ) -> Option<Vec<(Offset, ObjectId)>>; fn entry_by_location(&self, location: &Location) -> Option<Entry>; fn try_find<'a>(
        &self,
        id: impl AsRef<oid>,
        buffer: &'a mut Vec<u8>
    ) -> Result<Option<(Data<'a>, Option<Location>)>, Self::Error> { ... } }
Expand description

Describe how object can be located in an object store with built-in facilities to supports packs specifically.

Notes

Find effectively needs generic associated types to allow a trait for the returned object type. Until then, we will have to make due with explicit types and give them the potentially added features we want.

Furthermore, despite this trait being in git-pack, it leaks knowledge about objects potentially not being packed. This is a necessary trade-off to allow this trait to live in git-pack where it is used in functions to create a pack.

Required Associated Types§

The error returned by try_find()

Required Methods§

Returns true if the object exists in the database.

Like Find::try_find(), but with support for controlling the pack cache. A pack_cache can be used to speed up subsequent lookups, set it to crate::cache::Never if the workload isn’t suitable for caching.

Returns Some((<object data>, <pack location if packed>)) if it was present in the database, or the error that occurred during lookup or object retrieval.

Find the packs location where an object with id can be found in the database, or None if there is no pack holding the object.

Note that this is always None if the object isn’t packed even though it exists as loose object.

Obtain a vector of all offsets, in index order, along with their object id.

Return the find::Entry for location if it is backed by a pack.

Note that this is only in the interest of avoiding duplicate work during pack generation. Pack locations can be obtained from Find::try_find().

Notes

Custom implementations might be interested in providing their own meta-data with object, which currently isn’t possible as the Locate trait requires GATs to work like that.

Provided Methods§

Find an object matching id in the database while placing its raw, decoded data into buffer. A pack_cache can be used to speed up subsequent lookups, set it to crate::cache::Never if the workload isn’t suitable for caching.

Returns Some((<object data>, <pack location if packed>)) if it was present in the database, or the error that occurred during lookup or object retrieval.

Examples found in repository?
src/find_traits.rs (line 141)
134
135
136
137
138
139
140
141
142
143
144
145
146
        fn find<'a>(
            &self,
            id: impl AsRef<git_hash::oid>,
            buffer: &'a mut Vec<u8>,
        ) -> Result<(git_object::Data<'a>, Option<crate::data::entry::Location>), find::existing::Error<Self::Error>>
        {
            let id = id.as_ref();
            self.try_find(id, buffer)
                .map_err(find::existing::Error::Find)?
                .ok_or_else(|| find::existing::Error::NotFound {
                    oid: id.as_ref().to_owned(),
                })
        }
More examples
Hide additional examples
src/data/output/entry/iter_from_counts.rs (line 205)
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
pub fn iter_from_counts<Find>(
    mut counts: Vec<output::Count>,
    db: Find,
    mut progress: impl Progress + 'static,
    Options {
        version,
        mode,
        allow_thin_pack,
        thread_limit,
        chunk_size,
    }: Options,
) -> impl Iterator<Item = Result<(SequenceId, Vec<output::Entry>), Error<Find::Error>>>
       + parallel::reduce::Finalize<Reduce = reduce::Statistics<Error<Find::Error>>>
where
    Find: crate::Find + Send + Clone + 'static,
    <Find as crate::Find>::Error: Send,
{
    assert!(
        matches!(version, crate::data::Version::V2),
        "currently we can only write version 2"
    );
    let (chunk_size, thread_limit, _) =
        parallel::optimize_chunk_size_and_thread_limit(chunk_size, Some(counts.len()), thread_limit, None);
    {
        let progress = Arc::new(parking_lot::Mutex::new(
            progress.add_child_with_id("resolving", *b"ECRC"),
        )); /* Entries from Counts Resolving Counts  */
        progress.lock().init(None, git_features::progress::count("counts"));
        let enough_counts_present = counts.len() > 4_000;
        let start = std::time::Instant::now();
        parallel::in_parallel_if(
            || enough_counts_present,
            counts.chunks_mut(chunk_size),
            thread_limit,
            |_n| Vec::<u8>::new(),
            {
                let progress = Arc::clone(&progress);
                let db = db.clone();
                move |chunk, buf| {
                    let chunk_size = chunk.len();
                    for count in chunk {
                        use crate::data::output::count::PackLocation::*;
                        match count.entry_pack_location {
                            LookedUp(_) => continue,
                            NotLookedUp => count.entry_pack_location = LookedUp(db.location_by_oid(count.id, buf)),
                        }
                    }
                    progress.lock().inc_by(chunk_size);
                    Ok::<_, ()>(())
                }
            },
            parallel::reduce::IdentityWithResult::<(), ()>::default(),
        )
        .expect("infallible - we ignore none-existing objects");
        progress.lock().show_throughput(start);
    }
    let counts_range_by_pack_id = match mode {
        Mode::PackCopyAndBaseObjects => {
            let mut progress = progress.add_child_with_id("sorting", *b"ECSE"); /* Entries from Counts Sorting Entries */
            progress.init(Some(counts.len()), git_features::progress::count("counts"));
            let start = std::time::Instant::now();

            use crate::data::output::count::PackLocation::*;
            counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) {
                (LookedUp(None), LookedUp(None)) => Ordering::Equal,
                (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater,
                (LookedUp(None), LookedUp(Some(_))) => Ordering::Less,
                (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs
                    .pack_id
                    .cmp(&rhs.pack_id)
                    .then(lhs.pack_offset.cmp(&rhs.pack_offset)),
                (_, _) => unreachable!("counts were resolved beforehand"),
            });

            let mut index: Vec<(u32, std::ops::Range<usize>)> = Vec::new();
            let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none());
            let mut slice = &counts[chunks_pack_start..];
            while !slice.is_empty() {
                let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id;
                let pack_end = slice.partition_point(|e| {
                    e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id
                });
                index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end));
                slice = &slice[pack_end..];
                chunks_pack_start += pack_end;
            }

            progress.set(counts.len());
            progress.show_throughput(start);

            index
        }
    };

    let counts = Arc::new(counts);
    let progress = Arc::new(parking_lot::Mutex::new(progress));
    let chunks = util::ChunkRanges::new(chunk_size, counts.len());

    parallel::reduce::Stepwise::new(
        chunks.enumerate(),
        thread_limit,
        {
            let progress = Arc::clone(&progress);
            move |n| {
                (
                    Vec::new(), // object data buffer
                    progress
                        .lock()
                        .add_child_with_id(format!("thread {}", n), git_features::progress::UNKNOWN),
                )
            }
        },
        {
            let counts = Arc::clone(&counts);
            move |(chunk_id, chunk_range): (SequenceId, std::ops::Range<usize>), (buf, progress)| {
                let mut out = Vec::new();
                let chunk = &counts[chunk_range];
                let mut stats = Outcome::default();
                let mut pack_offsets_to_id = None;
                progress.init(Some(chunk.len()), git_features::progress::count("objects"));

                for count in chunk.iter() {
                    out.push(match count
                        .entry_pack_location
                        .as_ref()
                        .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe)))
                    {
                        Some((location, pack_entry)) => {
                            if let Some((cached_pack_id, _)) = &pack_offsets_to_id {
                                if *cached_pack_id != location.pack_id {
                                    pack_offsets_to_id = None;
                                }
                            }
                            let pack_range = counts_range_by_pack_id[counts_range_by_pack_id
                                .binary_search_by_key(&location.pack_id, |e| e.0)
                                .expect("pack-id always present")]
                            .1
                            .clone();
                            let base_index_offset = pack_range.start;
                            let counts_in_pack = &counts[pack_range];
                            match output::Entry::from_pack_entry(
                                pack_entry,
                                count,
                                counts_in_pack,
                                base_index_offset,
                                allow_thin_pack.then(|| {
                                    |pack_id, base_offset| {
                                        let (cached_pack_id, cache) = pack_offsets_to_id.get_or_insert_with(|| {
                                            db.pack_offsets_and_oid(pack_id)
                                                .map(|mut v| {
                                                    v.sort_by_key(|e| e.0);
                                                    (pack_id, v)
                                                })
                                                .expect("pack used for counts is still available")
                                        });
                                        debug_assert_eq!(*cached_pack_id, pack_id);
                                        stats.ref_delta_objects += 1;
                                        cache
                                            .binary_search_by_key(&base_offset, |e| e.0)
                                            .ok()
                                            .map(|idx| cache[idx].1)
                                    }
                                }),
                                version,
                            ) {
                                Some(entry) => {
                                    stats.objects_copied_from_pack += 1;
                                    entry
                                }
                                None => match db.try_find(count.id, buf).map_err(Error::FindExisting)? {
                                    Some((obj, _location)) => {
                                        stats.decoded_and_recompressed_objects += 1;
                                        output::Entry::from_data(count, &obj)
                                    }
                                    None => {
                                        stats.missing_objects += 1;
                                        Ok(output::Entry::invalid())
                                    }
                                },
                            }
                        }
                        None => match db.try_find(count.id, buf).map_err(Error::FindExisting)? {
                            Some((obj, _location)) => {
                                stats.decoded_and_recompressed_objects += 1;
                                output::Entry::from_data(count, &obj)
                            }
                            None => {
                                stats.missing_objects += 1;
                                Ok(output::Entry::invalid())
                            }
                        },
                    }?);
                    progress.inc();
                }
                Ok((chunk_id, out, stats))
            }
        },
        reduce::Statistics::default(),
    )
}

Implementations on Foreign Types§

Implementors§