re_types_core/datatypes/
utf8.rs1#![allow(unused_braces)]
5#![allow(unused_imports)]
6#![allow(unused_parens)]
7#![allow(clippy::allow_attributes)]
8#![allow(clippy::clone_on_copy)]
9#![allow(clippy::cloned_instead_of_copied)]
10#![allow(clippy::map_flatten)]
11#![allow(clippy::needless_question_mark)]
12#![allow(clippy::new_without_default)]
13#![allow(clippy::redundant_closure)]
14#![allow(clippy::too_many_arguments)]
15#![allow(clippy::too_many_lines)]
16#![allow(clippy::wildcard_imports)]
17
18use crate::SerializationResult;
19use crate::try_serialize_field;
20use crate::{ComponentBatch as _, SerializedComponentBatch};
21use crate::{ComponentDescriptor, ComponentType};
22use crate::{DeserializationError, DeserializationResult};
23
24#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
26#[repr(transparent)]
27pub struct Utf8(pub crate::ArrowString);
28
29crate::macros::impl_into_cow!(Utf8);
30
31impl crate::Loggable for Utf8 {
32 #[inline]
33 fn arrow_datatype() -> arrow::datatypes::DataType {
34 use arrow::datatypes::*;
35 DataType::Utf8
36 }
37
38 fn to_arrow_opt<'a>(
39 data: impl IntoIterator<Item = Option<impl Into<::std::borrow::Cow<'a, Self>>>>,
40 ) -> SerializationResult<arrow::array::ArrayRef>
41 where
42 Self: Clone + 'a,
43 {
44 #![allow(clippy::manual_is_variant_and)]
45 use crate::{Loggable as _, ResultExt as _, arrow_helpers::as_array_ref};
46 use arrow::{array::*, buffer::*, datatypes::*};
47 Ok({
48 let (somes, data0): (Vec<_>, Vec<_>) = data
49 .into_iter()
50 .map(|datum| {
51 let datum: Option<::std::borrow::Cow<'a, Self>> = datum.map(Into::into);
52 let datum = datum.map(|datum| datum.into_owned().0);
53 (datum.is_some(), datum)
54 })
55 .unzip();
56 let data0_validity: Option<arrow::buffer::NullBuffer> = {
57 let any_nones = somes.iter().any(|some| !*some);
58 any_nones.then(|| somes.into())
59 };
60 {
61 let offsets = arrow::buffer::OffsetBuffer::from_lengths(
62 data0
63 .iter()
64 .map(|opt| opt.as_ref().map(|datum| datum.len()).unwrap_or_default()),
65 );
66
67 #[expect(clippy::unwrap_used)]
68 let capacity = offsets.last().copied().unwrap() as usize;
69 let mut buffer_builder = arrow::array::builder::BufferBuilder::<u8>::new(capacity);
70 for data in data0.iter().flatten() {
71 buffer_builder.append_slice(data.as_bytes());
72 }
73 let inner_data: arrow::buffer::Buffer = buffer_builder.finish();
74
75 #[expect(unsafe_code, clippy::undocumented_unsafe_blocks)]
76 as_array_ref(unsafe {
77 StringArray::new_unchecked(offsets, inner_data, data0_validity)
78 })
79 }
80 })
81 }
82
83 fn from_arrow_opt(
84 arrow_data: &dyn arrow::array::Array,
85 ) -> DeserializationResult<Vec<Option<Self>>>
86 where
87 Self: Sized,
88 {
89 use crate::{Loggable as _, ResultExt as _, arrow_zip_validity::ZipValidity};
90 use arrow::{array::*, buffer::*, datatypes::*};
91 Ok({
92 let arrow_data = arrow_data
93 .as_any()
94 .downcast_ref::<StringArray>()
95 .ok_or_else(|| {
96 let expected = Self::arrow_datatype();
97 let actual = arrow_data.data_type().clone();
98 DeserializationError::datatype_mismatch(expected, actual)
99 })
100 .with_context("rerun.datatypes.Utf8#value")?;
101 let arrow_data_buf = arrow_data.values();
102 let offsets = arrow_data.offsets();
103 ZipValidity::new_with_validity(offsets.windows(2), arrow_data.nulls())
104 .map(|elem| {
105 elem.map(|window| {
106 let start = window[0] as usize;
107 let end = window[1] as usize;
108 let len = end - start;
109 if arrow_data_buf.len() < end {
110 return Err(DeserializationError::offset_slice_oob(
111 (start, end),
112 arrow_data_buf.len(),
113 ));
114 }
115 let data = arrow_data_buf.slice_with_length(start, len);
116 Ok(data)
117 })
118 .transpose()
119 })
120 .map(|res_or_opt| {
121 res_or_opt.map(|res_or_opt| res_or_opt.map(|v| crate::ArrowString::from(v)))
122 })
123 .collect::<DeserializationResult<Vec<Option<_>>>>()
124 .with_context("rerun.datatypes.Utf8#value")?
125 .into_iter()
126 }
127 .map(|v| v.ok_or_else(DeserializationError::missing_data))
128 .map(|res| res.map(|v| Some(Self(v))))
129 .collect::<DeserializationResult<Vec<Option<_>>>>()
130 .with_context("rerun.datatypes.Utf8#value")
131 .with_context("rerun.datatypes.Utf8")?)
132 }
133}
134
135impl From<crate::ArrowString> for Utf8 {
136 #[inline]
137 fn from(value: crate::ArrowString) -> Self {
138 Self(value)
139 }
140}
141
142impl From<Utf8> for crate::ArrowString {
143 #[inline]
144 fn from(value: Utf8) -> Self {
145 value.0
146 }
147}
148
149impl ::re_byte_size::SizeBytes for Utf8 {
150 #[inline]
151 fn heap_size_bytes(&self) -> u64 {
152 self.0.heap_size_bytes()
153 }
154
155 #[inline]
156 fn is_pod() -> bool {
157 <crate::ArrowString>::is_pod()
158 }
159}