1use crate::Result;
2use memmap2::{Mmap, MmapMut};
3use std::{borrow::Cow, ops::Range, ptr::NonNull, sync::Arc};
4
5#[cfg(unix)]
6pub(crate) use std::os::fd::AsRawFd as Mmappable;
7#[cfg(windows)]
8pub(crate) use std::os::windows::io::AsRawHandle as Mmappable;
9
10pub struct SegmentRaw<Buf> {
11 range: Range<u64>,
12 data: Buf,
13}
14
15pub type SegmentMut = SegmentRaw<MmapMut>;
16pub type Segment = SegmentRaw<Mmap>;
17
18impl<Buf> SegmentRaw<Buf>
19where
20 Buf: AsRef<[u8]>,
21{
22 pub const MAX_SIZE: u64 = 1 << 20;
23
24 #[inline]
25 pub fn start(&self) -> u64 {
26 self.range.start
27 }
28
29 #[inline]
30 pub fn translate_inner_data_index(&self, start: u64) -> u64 {
31 debug_assert!(self.range.start <= start);
32 debug_assert!(start <= self.range.end);
36 start - self.range.start
37 }
38
39 #[inline]
40 pub fn translate_inner_data_range(&self, start: u64, end: u64) -> Range<u64> {
41 self.translate_inner_data_index(start)..self.translate_inner_data_index(end)
42 }
43
44 #[inline]
45 pub fn id_of_data(start: u64) -> usize {
46 (start / Self::MAX_SIZE) as usize
47 }
48
49 #[inline]
50 pub fn data_range_of_id(id: usize) -> Range<u64> {
51 let start = id as u64 * Self::MAX_SIZE;
52 start..start + Self::MAX_SIZE
53 }
54}
55
56impl<Buf> std::ops::Deref for SegmentRaw<Buf>
57where
58 Buf: std::ops::Deref<Target = [u8]>,
59{
60 type Target = [u8];
61
62 fn deref(&self) -> &Self::Target {
63 &self.data
64 }
65}
66
67impl<Buf> std::ops::DerefMut for SegmentRaw<Buf>
68where
69 Buf: std::ops::DerefMut<Target = [u8]>,
70{
71 fn deref_mut(&mut self) -> &mut Self::Target {
72 &mut self.data
73 }
74}
75
76impl SegmentMut {
77 pub(crate) fn new(start: u64) -> Result<Self> {
78 let data = memmap2::MmapOptions::new()
79 .len(Self::MAX_SIZE as usize)
80 .map_anon()?;
81 #[cfg(unix)]
82 data.advise(memmap2::Advice::Sequential)?;
83 Ok(Self {
84 data,
85 range: start..start + Self::MAX_SIZE,
86 })
87 }
88
89 pub fn into_read_only(self) -> Result<Segment> {
90 Ok(Segment {
91 data: self.data.make_read_only()?,
92 range: self.range,
93 })
94 }
95}
96
97impl Segment {
98 pub(crate) fn map_file<F: Mmappable>(range: Range<u64>, file: &F) -> Result<Self> {
99 let size = range.end - range.start;
100 debug_assert!(size <= Self::MAX_SIZE);
101 let data = unsafe {
102 memmap2::MmapOptions::new()
103 .offset(range.start)
104 .len(size as usize)
105 .map(file)?
106 };
107 #[cfg(unix)]
108 data.advise(memmap2::Advice::WillNeed)?;
109 Ok(Self { data, range })
110 }
111
112 #[inline]
113 pub fn get_line(self: &Arc<Self>, range: Range<u64>) -> SegStr {
114 SegStr::from_bytes(self.get_bytes(range))
115 }
116
117 #[inline]
118 pub fn get_bytes(self: &Arc<Self>, range: Range<u64>) -> SegBytes {
119 SegBytes::new_borrow(self.clone(), range)
120 }
121}
122
123pub struct SegBytes(SegBytesRepr);
130
131enum SegBytesRepr {
133 Borrowed {
134 _ref: Arc<Segment>,
136 ptr: NonNull<u8>,
140 len: usize,
141 },
142 Owned(Vec<u8>),
143}
144
145impl SegBytes {
146 fn new_borrow(origin: Arc<Segment>, range: Range<u64>) -> Self {
154 let data = &origin.data[range.start as usize..range.end as usize];
160 Self(SegBytesRepr::Borrowed {
161 ptr: unsafe { NonNull::new(data.as_ptr().cast_mut()).unwrap_unchecked() },
162 len: data.len(),
163 _ref: origin,
164 })
165 }
166
167 #[inline]
169 pub fn new_owned(s: Vec<u8>) -> Self {
170 Self(SegBytesRepr::Owned(s))
171 }
172
173 #[inline]
175 pub fn as_bytes(&self) -> &[u8] {
176 match &self.0 {
178 SegBytesRepr::Borrowed { ptr, len, .. } => unsafe {
179 std::slice::from_raw_parts(ptr.as_ptr(), *len)
180 },
181 SegBytesRepr::Owned(s) => s.as_slice(),
182 }
183 }
184}
185
186impl std::borrow::Borrow<[u8]> for SegBytes {
187 #[inline]
188 fn borrow(&self) -> &[u8] {
189 self
190 }
191}
192
193impl std::ops::Deref for SegBytes {
194 type Target = [u8];
195
196 #[inline]
197 fn deref(&self) -> &Self::Target {
198 self.as_bytes()
199 }
200}
201
202impl std::convert::AsRef<[u8]> for SegBytes {
203 #[inline]
204 fn as_ref(&self) -> &[u8] {
205 self.as_bytes()
206 }
207}
208
209#[derive(Clone)]
216pub struct SegStr(SegStrRepr);
217
218#[derive(Clone)]
220enum SegStrRepr {
221 Borrowed {
222 _ref: Arc<Segment>,
224 ptr: NonNull<u8>,
228 len: usize,
229 },
230 Owned(String),
231}
232
233impl SegStr {
234 pub fn from_bytes(bytes: SegBytes) -> Self {
237 match bytes.0 {
238 SegBytesRepr::Borrowed { _ref, ptr, len } => {
239 let data = unsafe { std::slice::from_raw_parts(ptr.as_ptr(), len) };
241 match String::from_utf8_lossy(data) {
242 Cow::Owned(s) => Self(SegStrRepr::Owned(s)),
243 Cow::Borrowed(_) => Self(SegStrRepr::Borrowed { ptr, len, _ref }),
244 }
245 }
246 SegBytesRepr::Owned(b) => match String::from_utf8_lossy(&b) {
247 Cow::Owned(s) => Self(SegStrRepr::Owned(s)),
248 Cow::Borrowed(_) => {
249 Self(SegStrRepr::Owned(unsafe { String::from_utf8_unchecked(b) }))
252 }
253 },
254 }
255 }
256
257 #[inline]
259 pub fn as_bytes(&self) -> &[u8] {
260 match &self.0 {
262 SegStrRepr::Borrowed { ptr, len, .. } => unsafe {
263 std::slice::from_raw_parts(ptr.as_ptr(), *len)
264 },
265 SegStrRepr::Owned(s) => s.as_bytes(),
266 }
267 }
268
269 #[inline]
271 pub fn as_str(&self) -> &str {
272 unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
274 }
275}
276
277impl std::borrow::Borrow<str> for SegStr {
278 #[inline]
279 fn borrow(&self) -> &str {
280 self
281 }
282}
283
284impl std::ops::Deref for SegStr {
285 type Target = str;
286
287 #[inline]
288 fn deref(&self) -> &Self::Target {
289 self.as_str()
290 }
291}
292
293impl std::convert::AsRef<str> for SegStr {
294 #[inline]
295 fn as_ref(&self) -> &str {
296 self.as_str()
297 }
298}
299
300impl std::fmt::Debug for SegStr {
301 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
302 std::fmt::Debug::fmt(self.as_str(), f)
303 }
304}