re_types_core/datatypes/
utf8.rs1#![allow(unused_imports)]
5#![allow(unused_parens)]
6#![allow(clippy::clone_on_copy)]
7#![allow(clippy::cloned_instead_of_copied)]
8#![allow(clippy::map_flatten)]
9#![allow(clippy::needless_question_mark)]
10#![allow(clippy::new_without_default)]
11#![allow(clippy::redundant_closure)]
12#![allow(clippy::too_many_arguments)]
13#![allow(clippy::too_many_lines)]
14
15use crate::try_serialize_field;
16use crate::SerializationResult;
17use crate::{ComponentBatch, SerializedComponentBatch};
18use crate::{ComponentDescriptor, ComponentName};
19use crate::{DeserializationError, DeserializationResult};
20
21#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
23#[repr(transparent)]
24pub struct Utf8(pub crate::ArrowString);
25
26crate::macros::impl_into_cow!(Utf8);
27
28impl crate::Loggable for Utf8 {
29 #[inline]
30 fn arrow_datatype() -> arrow::datatypes::DataType {
31 #![allow(clippy::wildcard_imports)]
32 use arrow::datatypes::*;
33 DataType::Utf8
34 }
35
36 fn to_arrow_opt<'a>(
37 data: impl IntoIterator<Item = Option<impl Into<::std::borrow::Cow<'a, Self>>>>,
38 ) -> SerializationResult<arrow::array::ArrayRef>
39 where
40 Self: Clone + 'a,
41 {
42 #![allow(clippy::wildcard_imports)]
43 #![allow(clippy::manual_is_variant_and)]
44 use crate::{arrow_helpers::as_array_ref, Loggable as _, ResultExt as _};
45 use arrow::{array::*, buffer::*, datatypes::*};
46 Ok({
47 let (somes, data0): (Vec<_>, Vec<_>) = data
48 .into_iter()
49 .map(|datum| {
50 let datum: Option<::std::borrow::Cow<'a, Self>> = datum.map(Into::into);
51 let datum = datum.map(|datum| datum.into_owned().0);
52 (datum.is_some(), datum)
53 })
54 .unzip();
55 let data0_validity: Option<arrow::buffer::NullBuffer> = {
56 let any_nones = somes.iter().any(|some| !*some);
57 any_nones.then(|| somes.into())
58 };
59 {
60 let offsets = arrow::buffer::OffsetBuffer::<i32>::from_lengths(
61 data0
62 .iter()
63 .map(|opt| opt.as_ref().map(|datum| datum.len()).unwrap_or_default()),
64 );
65
66 #[allow(clippy::unwrap_used)]
67 let capacity = offsets.last().copied().unwrap() as usize;
68 let mut buffer_builder = arrow::array::builder::BufferBuilder::<u8>::new(capacity);
69 for data in data0.iter().flatten() {
70 buffer_builder.append_slice(data.as_bytes());
71 }
72 let inner_data: arrow::buffer::Buffer = buffer_builder.finish();
73
74 #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
75 as_array_ref(unsafe {
76 StringArray::new_unchecked(offsets, inner_data, data0_validity)
77 })
78 }
79 })
80 }
81
82 fn from_arrow_opt(
83 arrow_data: &dyn arrow::array::Array,
84 ) -> DeserializationResult<Vec<Option<Self>>>
85 where
86 Self: Sized,
87 {
88 #![allow(clippy::wildcard_imports)]
89 use crate::{arrow_zip_validity::ZipValidity, Loggable as _, ResultExt as _};
90 use arrow::{array::*, buffer::*, datatypes::*};
91 Ok({
92 let arrow_data = arrow_data
93 .as_any()
94 .downcast_ref::<StringArray>()
95 .ok_or_else(|| {
96 let expected = Self::arrow_datatype();
97 let actual = arrow_data.data_type().clone();
98 DeserializationError::datatype_mismatch(expected, actual)
99 })
100 .with_context("rerun.datatypes.Utf8#value")?;
101 let arrow_data_buf = arrow_data.values();
102 let offsets = arrow_data.offsets();
103 ZipValidity::new_with_validity(offsets.windows(2), arrow_data.nulls())
104 .map(|elem| {
105 elem.map(|window| {
106 let start = window[0] as usize;
107 let end = window[1] as usize;
108 let len = end - start;
109 if arrow_data_buf.len() < end {
110 return Err(DeserializationError::offset_slice_oob(
111 (start, end),
112 arrow_data_buf.len(),
113 ));
114 }
115
116 #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
117 let data = arrow_data_buf.slice_with_length(start, len);
118 Ok(data)
119 })
120 .transpose()
121 })
122 .map(|res_or_opt| {
123 res_or_opt.map(|res_or_opt| res_or_opt.map(|v| crate::ArrowString::from(v)))
124 })
125 .collect::<DeserializationResult<Vec<Option<_>>>>()
126 .with_context("rerun.datatypes.Utf8#value")?
127 .into_iter()
128 }
129 .map(|v| v.ok_or_else(DeserializationError::missing_data))
130 .map(|res| res.map(|v| Some(Self(v))))
131 .collect::<DeserializationResult<Vec<Option<_>>>>()
132 .with_context("rerun.datatypes.Utf8#value")
133 .with_context("rerun.datatypes.Utf8")?)
134 }
135}
136
137impl From<crate::ArrowString> for Utf8 {
138 #[inline]
139 fn from(value: crate::ArrowString) -> Self {
140 Self(value)
141 }
142}
143
144impl From<Utf8> for crate::ArrowString {
145 #[inline]
146 fn from(value: Utf8) -> Self {
147 value.0
148 }
149}
150
151impl ::re_byte_size::SizeBytes for Utf8 {
152 #[inline]
153 fn heap_size_bytes(&self) -> u64 {
154 self.0.heap_size_bytes()
155 }
156
157 #[inline]
158 fn is_pod() -> bool {
159 <crate::ArrowString>::is_pod()
160 }
161}