Skip to main content

xet_client/cas_types/
mod.rs

1use core::fmt;
2use std::cmp::min;
3use std::collections::{HashMap, HashSet};
4use std::marker::PhantomData;
5use std::str::FromStr;
6
7use serde::{Deserialize, Serialize};
8use serde_repr::{Deserialize_repr, Serialize_repr};
9use thiserror::Error;
10use xet_core_structures::merklehash::MerkleHash;
11
12mod key;
13pub use key::*;
14
15/// Indicates a "session id" that clients can use to group together related requests
16/// (e.g. all requests made to CAS to support a user-triggered upload (xorbs + shards)).
17pub const SESSION_ID_HEADER: &str = "X-Xet-Session-Id";
18/// Request id generated by CAS for a request.
19pub const REQUEST_ID_HEADER: &str = "X-Request-Id";
20
21#[derive(Debug, Serialize, Deserialize, Clone)]
22pub struct UploadXorbResponse {
23    pub was_inserted: bool,
24}
25
26/// These types are defined to help differentiate the Range<,> type aliases,
27/// so that they don't silently cast to each other without range adjustments.
28#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default, Hash, Copy)]
29pub struct _C;
30#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default, Hash, Copy)]
31pub struct _F;
32#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default, Hash, Copy)]
33pub struct _H;
34
35/// Start and exclusive-end range for chunk content
36pub type ChunkRange = Range<u32, _C>;
37/// Start and exclusive-end range for file content
38pub type FileRange = Range<u64, _F>;
39/// Start and inclusive-end range for HTTP range content
40pub type HttpRange = Range<u64, _H>;
41
42impl FileRange {
43    pub fn full() -> Self {
44        Self::new(0, u64::MAX)
45    }
46
47    // consumes self and split the range into a segment of size `segment_size`
48    // and a remainder.
49    pub fn take_segment(self, segment_size: u64) -> (Self, Option<Self>) {
50        let segment = FileRange {
51            start: self.start,
52            end: min(self.end, self.start + segment_size),
53            _marker: PhantomData,
54        };
55
56        let remainder = if segment.end == self.end {
57            None
58        } else {
59            Some(FileRange {
60                start: segment.end,
61                end: self.end,
62                _marker: PhantomData,
63            })
64        };
65
66        (segment, remainder)
67    }
68
69    pub fn length(&self) -> u64 {
70        self.end - self.start
71    }
72}
73
74impl From<HttpRange> for FileRange {
75    fn from(value: HttpRange) -> Self {
76        // right inclusive to right exclusive
77        FileRange::new(value.start, value.end + 1)
78    }
79}
80
81impl HttpRange {
82    pub fn range_header(&self) -> String {
83        format!("bytes={self}")
84    }
85
86    pub fn length(&self) -> u64 {
87        self.end - self.start + 1
88    }
89}
90
91impl From<FileRange> for HttpRange {
92    fn from(value: FileRange) -> Self {
93        // right exclusive to right inclusive
94        HttpRange::new(value.start, value.end - 1)
95    }
96}
97
98// note that the standard PartialOrd/Ord impls will first check `start` then `end`
99#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, PartialOrd, Ord, Default, Hash)]
100pub struct Range<Idx, Kind> {
101    pub start: Idx,
102    pub end: Idx,
103    #[serde(skip)]
104    pub _marker: PhantomData<Kind>,
105}
106
107impl<Idx, _C> fmt::Debug for Range<Idx, _C>
108where
109    Idx: fmt::Debug,
110{
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        f.debug_struct("Range")
113            .field("start", &self.start)
114            .field("end", &self.end)
115            .finish()
116    }
117}
118
119impl<Idx, Kind> Range<Idx, Kind> {
120    pub fn new(start: Idx, end: Idx) -> Self {
121        Self {
122            start,
123            end,
124            _marker: PhantomData,
125        }
126    }
127}
128
129impl<T: Copy, Kind: Copy> Copy for Range<T, Kind> {}
130
131impl<Idx: fmt::Display, Kind> fmt::Display for Range<Idx, Kind> {
132    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133        write!(f, "{}-{}", self.start, self.end)
134    }
135}
136
137#[derive(Error, Debug)]
138pub enum RangeParseError<Idx: std::str::FromStr> {
139    #[error("Invalid format, expect [start]-[end]")]
140    InvalidFormat,
141    #[error("Incorrect number: {0}")]
142    ParseError(Idx::Err),
143}
144
145impl<Idx: FromStr, Kind> TryFrom<&str> for Range<Idx, Kind> {
146    type Error = RangeParseError<Idx>;
147
148    fn try_from(value: &str) -> Result<Self, Self::Error> {
149        let parts: Vec<&str> = value.splitn(2, '-').collect();
150
151        if parts.len() != 2 {
152            return Err(RangeParseError::InvalidFormat);
153        }
154
155        let start = parts[0].parse::<Idx>().map_err(RangeParseError::ParseError)?;
156        let end = parts[1].parse::<Idx>().map_err(RangeParseError::ParseError)?;
157
158        Ok(Range {
159            start,
160            end,
161            _marker: PhantomData,
162        })
163    }
164}
165
166impl<Idx: FromStr, Kind> FromStr for Range<Idx, Kind> {
167    type Err = RangeParseError<Idx>;
168
169    fn from_str(value: &str) -> Result<Self, Self::Err> {
170        Self::try_from(value)
171    }
172}
173
174/// Describes a portion of a reconstructed file, namely the xorb and
175/// a range of chunks within that xorb that are needed.
176///
177/// unpacked_length is used for validation, the result data of this term
178/// should have that field's value as its length
179#[derive(Debug, Serialize, Deserialize, Clone)]
180pub struct XorbReconstructionTerm {
181    pub hash: HexMerkleHash,
182    // the resulting data from deserializing the range in this term
183    // should have a length equal to `unpacked_length`
184    pub unpacked_length: u32,
185    // chunk index start and end in a xorb
186    pub range: ChunkRange,
187}
188
189/// To use a XorbReconstructionFetchInfo fetch info all that's needed
190/// is an http get request on the url with the Range header directly
191/// formed from the url_range values.
192///
193/// the `range` key describes the chunk range within the xorb that the
194/// url is used to fetch
195#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)]
196pub struct XorbReconstructionFetchInfo {
197    // chunk index start and end in a xorb
198    pub range: ChunkRange,
199    pub url: String,
200    // byte index start and end in a xorb, used exclusively for Range header
201    pub url_range: HttpRange,
202}
203
204#[derive(Debug, Serialize, Deserialize, Clone)]
205pub struct QueryReconstructionResponse {
206    // For range query [a, b) into a file content, the location
207    // of "a" into the first range.
208    pub offset_into_first_range: u64,
209    // Series of terms describing a xorb hash and chunk range to be retreived
210    // to reconstruct the file
211    pub terms: Vec<XorbReconstructionTerm>,
212    // information to fetch xorb ranges to reconstruct the file
213    // each key is a hash that is present in the `terms` field reconstruction
214    // terms, the values are information we will need to fetch ranges from
215    // each xorb needed to reconstruct the file
216    pub fetch_info: HashMap<HexMerkleHash, Vec<XorbReconstructionFetchInfo>>,
217}
218
219/// V2 reconstruction response - optimized for multi-range fetching.
220/// May provide fewer signed URLs per xorb by combining multiple byte ranges
221/// into a single URL where possible.
222#[derive(Debug, Serialize, Deserialize, Clone)]
223pub struct QueryReconstructionResponseV2 {
224    pub offset_into_first_range: u64,
225    pub terms: Vec<XorbReconstructionTerm>,
226    /// Map from xorb hash -> list of multi-range fetch entries.
227    /// Typically 1 entry per xorb. Multiple entries when the URL length limit
228    /// (~8 KiB, roughly ~500 ranges) forces a split.
229    pub xorbs: HashMap<HexMerkleHash, Vec<XorbMultiRangeFetch>>,
230}
231
232/// A signed multi-range fetch: one URL covering a subset of ranges for a xorb.
233#[derive(Debug, Serialize, Deserialize, Clone)]
234pub struct XorbMultiRangeFetch {
235    /// Signed URL with all byte ranges encoded. Client must send exactly the
236    /// signed range value as the Range header.
237    pub url: String,
238    /// Byte ranges covered by this URL, sorted by chunk start.
239    pub ranges: Vec<XorbRangeDescriptor>,
240}
241
242/// A single byte range within a xorb, mapping chunk indices to physical bytes.
243#[derive(Debug, Serialize, Deserialize, Clone)]
244pub struct XorbRangeDescriptor {
245    /// Chunk index range [start, end) within the xorb.
246    pub chunks: ChunkRange,
247    /// Physical byte range [start, end] (inclusive end) for the HTTP Range header.
248    pub bytes: HttpRange,
249}
250
251impl From<QueryReconstructionResponse> for QueryReconstructionResponseV2 {
252    fn from(v1: QueryReconstructionResponse) -> Self {
253        let xorbs = v1
254            .fetch_info
255            .into_iter()
256            .map(|(hash, fetch_infos)| {
257                let fetch = fetch_infos
258                    .into_iter()
259                    .map(|info| XorbMultiRangeFetch {
260                        url: info.url,
261                        ranges: vec![XorbRangeDescriptor {
262                            chunks: info.range,
263                            bytes: info.url_range,
264                        }],
265                    })
266                    .collect();
267                (hash, fetch)
268            })
269            .collect();
270
271        QueryReconstructionResponseV2 {
272            offset_into_first_range: v1.offset_into_first_range,
273            terms: v1.terms,
274            xorbs,
275        }
276    }
277}
278
279// Request json body type representation for the POST /reconstructions endpoint
280// to get the reconstruction for multiple files at a time.
281// listing of non-duplicate (enforced by HashSet) keys (file ids) to get reconstructions for
282pub type BatchQueryReconstructionRequest = HashSet<HexKey>;
283
284// Response type for querying reconstruction for a batch of files
285#[derive(Debug, Serialize, Deserialize, Clone)]
286pub struct BatchQueryReconstructionResponse {
287    // Map of FileID to series of terms describing a xorb hash and chunk range to be retreived
288    // to reconstruct the file
289    pub files: HashMap<HexMerkleHash, Vec<XorbReconstructionTerm>>,
290    // information to fetch xorb ranges to reconstruct the file
291    // each key is a hash that is present in the `terms` field reconstruction
292    // terms, the values are information we will need to fetch ranges from
293    // each xorb needed to reconstruct the file
294    pub fetch_info: HashMap<HexMerkleHash, Vec<XorbReconstructionFetchInfo>>,
295}
296
297#[derive(Debug, Serialize_repr, Deserialize_repr, Clone, Copy)]
298#[repr(u8)]
299pub enum UploadShardResponseType {
300    Exists = 0,
301    SyncPerformed = 1,
302}
303
304#[derive(Debug, Serialize, Deserialize, Clone)]
305pub struct UploadShardResponse {
306    pub result: UploadShardResponseType,
307}
308
309#[derive(Debug, Serialize, Deserialize, Clone)]
310pub struct QueryChunkResponse {
311    pub shard: MerkleHash,
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317
318    #[test]
319    fn test_file_range_segment() {
320        let file_range = FileRange::full();
321        let segment_size = 824820;
322
323        let (segment, remainder) = file_range.take_segment(segment_size);
324
325        assert_eq!(segment, FileRange::new(0, segment_size));
326        assert_eq!(remainder, Some(FileRange::new(segment_size, u64::MAX)));
327    }
328
329    #[test]
330    fn test_file_range_segment_no_remainder() {
331        let file_range = FileRange::new(50, 100);
332        let segment_size = 40;
333
334        let (s1, remainder) = file_range.take_segment(segment_size);
335
336        assert_eq!(s1, FileRange::new(50, 90));
337        assert_eq!(remainder, Some(FileRange::new(90, 100)));
338
339        let (s2, remainder) = remainder.unwrap().take_segment(segment_size);
340
341        assert_eq!(s2, FileRange::new(90, 100));
342        assert_eq!(remainder, None);
343    }
344
345    #[test]
346    fn test_http_range_type_casting() {
347        assert_eq!(HttpRange::from(FileRange::new(0, 10)), HttpRange::new(0, 9));
348
349        assert_eq!(FileRange::from(HttpRange::new(0, 10)), FileRange::new(0, 11));
350    }
351}