re_types_core/datatypes/
utf8.rs1#![allow(unused_braces)]
5#![allow(unused_imports)]
6#![allow(unused_parens)]
7#![allow(clippy::clone_on_copy)]
8#![allow(clippy::cloned_instead_of_copied)]
9#![allow(clippy::map_flatten)]
10#![allow(clippy::needless_question_mark)]
11#![allow(clippy::new_without_default)]
12#![allow(clippy::redundant_closure)]
13#![allow(clippy::too_many_arguments)]
14#![allow(clippy::too_many_lines)]
15
16use crate::try_serialize_field;
17use crate::SerializationResult;
18use crate::{ComponentBatch as _, SerializedComponentBatch};
19use crate::{ComponentDescriptor, ComponentType};
20use crate::{DeserializationError, DeserializationResult};
21
22#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
24#[repr(transparent)]
25pub struct Utf8(pub crate::ArrowString);
26
27crate::macros::impl_into_cow!(Utf8);
28
29impl crate::Loggable for Utf8 {
30 #[inline]
31 fn arrow_datatype() -> arrow::datatypes::DataType {
32 #![allow(clippy::wildcard_imports)]
33 use arrow::datatypes::*;
34 DataType::Utf8
35 }
36
37 fn to_arrow_opt<'a>(
38 data: impl IntoIterator<Item = Option<impl Into<::std::borrow::Cow<'a, Self>>>>,
39 ) -> SerializationResult<arrow::array::ArrayRef>
40 where
41 Self: Clone + 'a,
42 {
43 #![allow(clippy::wildcard_imports)]
44 #![allow(clippy::manual_is_variant_and)]
45 use crate::{arrow_helpers::as_array_ref, Loggable as _, ResultExt as _};
46 use arrow::{array::*, buffer::*, datatypes::*};
47 Ok({
48 let (somes, data0): (Vec<_>, Vec<_>) = data
49 .into_iter()
50 .map(|datum| {
51 let datum: Option<::std::borrow::Cow<'a, Self>> = datum.map(Into::into);
52 let datum = datum.map(|datum| datum.into_owned().0);
53 (datum.is_some(), datum)
54 })
55 .unzip();
56 let data0_validity: Option<arrow::buffer::NullBuffer> = {
57 let any_nones = somes.iter().any(|some| !*some);
58 any_nones.then(|| somes.into())
59 };
60 {
61 let offsets = arrow::buffer::OffsetBuffer::<i32>::from_lengths(
62 data0
63 .iter()
64 .map(|opt| opt.as_ref().map(|datum| datum.len()).unwrap_or_default()),
65 );
66
67 #[allow(clippy::unwrap_used)]
68 let capacity = offsets.last().copied().unwrap() as usize;
69 let mut buffer_builder = arrow::array::builder::BufferBuilder::<u8>::new(capacity);
70 for data in data0.iter().flatten() {
71 buffer_builder.append_slice(data.as_bytes());
72 }
73 let inner_data: arrow::buffer::Buffer = buffer_builder.finish();
74
75 #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
76 as_array_ref(unsafe {
77 StringArray::new_unchecked(offsets, inner_data, data0_validity)
78 })
79 }
80 })
81 }
82
83 fn from_arrow_opt(
84 arrow_data: &dyn arrow::array::Array,
85 ) -> DeserializationResult<Vec<Option<Self>>>
86 where
87 Self: Sized,
88 {
89 #![allow(clippy::wildcard_imports)]
90 use crate::{arrow_zip_validity::ZipValidity, Loggable as _, ResultExt as _};
91 use arrow::{array::*, buffer::*, datatypes::*};
92 Ok({
93 let arrow_data = arrow_data
94 .as_any()
95 .downcast_ref::<StringArray>()
96 .ok_or_else(|| {
97 let expected = Self::arrow_datatype();
98 let actual = arrow_data.data_type().clone();
99 DeserializationError::datatype_mismatch(expected, actual)
100 })
101 .with_context("rerun.datatypes.Utf8#value")?;
102 let arrow_data_buf = arrow_data.values();
103 let offsets = arrow_data.offsets();
104 ZipValidity::new_with_validity(offsets.windows(2), arrow_data.nulls())
105 .map(|elem| {
106 elem.map(|window| {
107 let start = window[0] as usize;
108 let end = window[1] as usize;
109 let len = end - start;
110 if arrow_data_buf.len() < end {
111 return Err(DeserializationError::offset_slice_oob(
112 (start, end),
113 arrow_data_buf.len(),
114 ));
115 }
116
117 #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
118 let data = arrow_data_buf.slice_with_length(start, len);
119 Ok(data)
120 })
121 .transpose()
122 })
123 .map(|res_or_opt| {
124 res_or_opt.map(|res_or_opt| res_or_opt.map(|v| crate::ArrowString::from(v)))
125 })
126 .collect::<DeserializationResult<Vec<Option<_>>>>()
127 .with_context("rerun.datatypes.Utf8#value")?
128 .into_iter()
129 }
130 .map(|v| v.ok_or_else(DeserializationError::missing_data))
131 .map(|res| res.map(|v| Some(Self(v))))
132 .collect::<DeserializationResult<Vec<Option<_>>>>()
133 .with_context("rerun.datatypes.Utf8#value")
134 .with_context("rerun.datatypes.Utf8")?)
135 }
136}
137
138impl From<crate::ArrowString> for Utf8 {
139 #[inline]
140 fn from(value: crate::ArrowString) -> Self {
141 Self(value)
142 }
143}
144
145impl From<Utf8> for crate::ArrowString {
146 #[inline]
147 fn from(value: Utf8) -> Self {
148 value.0
149 }
150}
151
152impl ::re_byte_size::SizeBytes for Utf8 {
153 #[inline]
154 fn heap_size_bytes(&self) -> u64 {
155 self.0.heap_size_bytes()
156 }
157
158 #[inline]
159 fn is_pod() -> bool {
160 <crate::ArrowString>::is_pod()
161 }
162}