1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors
//! Utilities for byte arrays
use std::{ops::Deref, ptr::NonNull, sync::Arc};
use arrow_buffer::Buffer;
use snafu::{location, Location};
use lance_core::{Error, Result};
/// A copy-on-write byte buffer
///
/// It can be created from read-only buffers (e.g. bytes::Bytes or arrow_buffer::Buffer), e.g. "borrowed"
/// or from writeable buffers (e.g. Vec<u8>), e.g. "owned"
///
/// The buffer can switch to borrowed mode without a copy of the data
///
/// LanceBuffer does not implement Clone because doing could potentially silently trigger a copy of the data
/// and we want to make sure that the user is aware of this operation.
///
/// If you need to clone a LanceBuffer you can use borrow_and_clone() which will make sure that the buffer
/// is in borrowed mode before cloning. This is a zero copy operation (but requires &mut self).
#[derive(Debug)]
pub enum LanceBuffer {
Borrowed(Buffer),
Owned(Vec<u8>),
}
impl LanceBuffer {
/// Convert into a mutable buffer. If this is a borrowed buffer, the data will be copied.
pub fn into_owned(self) -> Vec<u8> {
match self {
Self::Borrowed(buffer) => buffer.to_vec(),
Self::Owned(buffer) => buffer,
}
}
/// Convert into an Arrow buffer. Never copies data.
pub fn into_buffer(self) -> Buffer {
match self {
Self::Borrowed(buffer) => buffer,
Self::Owned(buffer) => Buffer::from_vec(buffer),
}
}
/// Create a LanceBuffer from a bytes::Bytes object
///
/// The alignment must be specified (as `bytes_per_value`) since we want to make
/// sure we can safely reinterpret the buffer.
///
/// If the buffer is properly aligned this will be zero-copy. If not, a copy
/// will be made and an owned buffer returned.
pub fn from_bytes(bytes: bytes::Bytes, bytes_per_value: u64) -> Self {
if bytes.as_ptr().align_offset(bytes_per_value as usize) != 0 {
// The original buffer is not aligned, cannot zero-copy
let mut buf = Vec::with_capacity(bytes.len());
buf.extend_from_slice(&bytes);
Self::Owned(buf)
} else {
// The original buffer is aligned, can zero-copy
// SAFETY: the alignment is correct we can make this conversion
unsafe {
Self::Borrowed(Buffer::from_custom_allocation(
NonNull::new(bytes.as_ptr() as _).expect("should be a valid pointer"),
bytes.len(),
Arc::new(bytes),
))
}
}
}
/// Convert into a borrowed buffer, this is a zero-copy operation
///
/// This is often called before cloning the buffer
pub fn into_borrowed(self) -> Self {
match self {
Self::Borrowed(_) => self,
Self::Owned(buffer) => Self::Borrowed(Buffer::from_vec(buffer)),
}
}
/// Creates an owned copy of the buffer, will always involve a full copy of the bytes
pub fn to_owned(&self) -> Self {
match self {
Self::Borrowed(buffer) => Self::Owned(buffer.to_vec()),
Self::Owned(buffer) => Self::Owned(buffer.clone()),
}
}
/// Creates a clone of the buffer but also puts the buffer into borrowed mode
///
/// This is a zero-copy operation
pub fn borrow_and_clone(&mut self) -> Self {
match self {
Self::Borrowed(buffer) => Self::Borrowed(buffer.clone()),
Self::Owned(buffer) => {
let buf_data = std::mem::take(buffer);
let buffer = Buffer::from_vec(buf_data);
*self = Self::Borrowed(buffer.clone());
Self::Borrowed(buffer)
}
}
}
/// Clones the buffer but fails if the buffer is in owned mode
pub fn try_clone(&self) -> Result<Self> {
match self {
Self::Borrowed(buffer) => Ok(Self::Borrowed(buffer.clone())),
Self::Owned(_) => Err(Error::Internal {
message: "try_clone called on an owned buffer".to_string(),
location: location!(),
}),
}
}
}
impl AsRef<[u8]> for LanceBuffer {
fn as_ref(&self) -> &[u8] {
match self {
Self::Borrowed(buffer) => buffer.as_slice(),
Self::Owned(buffer) => buffer.as_slice(),
}
}
}
impl Deref for LanceBuffer {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.as_ref()
}
}
impl From<Vec<u8>> for LanceBuffer {
fn from(buffer: Vec<u8>) -> Self {
Self::Owned(buffer)
}
}
impl From<Buffer> for LanceBuffer {
fn from(buffer: Buffer) -> Self {
Self::Borrowed(buffer)
}
}