typst_library/foundations/
bytes.rs1use std::any::Any;
2use std::fmt::{self, Debug, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::{Add, AddAssign, Deref};
5use std::str::Utf8Error;
6use std::sync::Arc;
7
8use ecow::{EcoString, eco_format};
9use serde::{Serialize, Serializer};
10use typst_syntax::Lines;
11use typst_utils::LazyHash;
12
13use crate::diag::{StrResult, bail};
14use crate::foundations::{Array, Reflect, Repr, Str, Value, cast, func, scope, ty};
15
16#[ty(scope, cast)]
45#[derive(Clone, Hash)]
46#[allow(clippy::derived_hash_with_manual_eq)]
47pub struct Bytes(Arc<LazyHash<dyn Bytelike>>);
48
49impl Bytes {
50 pub fn new<T>(data: T) -> Self
62 where
63 T: AsRef<[u8]> + Send + Sync + 'static,
64 {
65 Self(Arc::new(LazyHash::new(data)))
66 }
67
68 pub fn from_string<T>(data: T) -> Self
74 where
75 T: AsRef<str> + Send + Sync + 'static,
76 {
77 Self(Arc::new(LazyHash::new(StrWrapper(data))))
78 }
79
80 pub fn is_empty(&self) -> bool {
82 self.as_slice().is_empty()
83 }
84
85 pub fn as_slice(&self) -> &[u8] {
87 self
88 }
89
90 pub fn as_str(&self) -> Result<&str, Utf8Error> {
95 self.inner().as_str()
96 }
97
98 pub fn to_vec(&self) -> Vec<u8> {
100 self.as_slice().to_vec()
101 }
102
103 pub fn to_str(&self) -> Result<Str, Utf8Error> {
110 match self.inner().as_any().downcast_ref::<Str>() {
111 Some(string) => Ok(string.clone()),
112 None => self.as_str().map(Into::into),
113 }
114 }
115
116 fn locate(&self, index: i64) -> StrResult<usize> {
118 self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len()))
119 }
120
121 fn locate_opt(&self, index: i64) -> Option<usize> {
125 let len = self.as_slice().len();
126 let wrapped =
127 if index >= 0 { Some(index) } else { (len as i64).checked_add(index) };
128 wrapped.and_then(|v| usize::try_from(v).ok()).filter(|&v| v <= len)
129 }
130
131 fn inner(&self) -> &dyn Bytelike {
133 &**self.0
134 }
135}
136
137#[scope]
138impl Bytes {
139 #[func(constructor)]
152 pub fn construct(
153 value: ToBytes,
155 ) -> Bytes {
156 value.0
157 }
158
159 #[func(title = "Length")]
161 pub fn len(&self) -> usize {
162 self.as_slice().len()
163 }
164
165 #[func]
169 pub fn at(
170 &self,
171 index: i64,
173 #[named]
175 default: Option<Value>,
176 ) -> StrResult<Value> {
177 self.locate_opt(index)
178 .and_then(|i| self.as_slice().get(i).map(|&b| Value::Int(b.into())))
179 .or(default)
180 .ok_or_else(|| out_of_bounds_no_default(index, self.len()))
181 }
182
183 #[func]
186 pub fn slice(
187 &self,
188 start: i64,
190 #[default]
193 end: Option<i64>,
194 #[named]
198 count: Option<i64>,
199 ) -> StrResult<Bytes> {
200 let start = self.locate(start)?;
201 let end = end.or(count.map(|c| start as i64 + c));
202 let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
203 let slice = &self.as_slice()[start..end];
204
205 Ok(Bytes::new(slice.to_vec()))
211 }
212}
213
214impl Debug for Bytes {
215 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
216 write!(f, "Bytes({})", self.len())
217 }
218}
219
220impl Repr for Bytes {
221 fn repr(&self) -> EcoString {
222 eco_format!("bytes({})", self.len())
223 }
224}
225
226impl Deref for Bytes {
227 type Target = [u8];
228
229 fn deref(&self) -> &Self::Target {
230 self.inner().as_bytes()
231 }
232}
233
234impl Eq for Bytes {}
235
236impl PartialEq for Bytes {
237 fn eq(&self, other: &Self) -> bool {
238 self.0.eq(&other.0)
239 }
240}
241
242impl AsRef<[u8]> for Bytes {
243 fn as_ref(&self) -> &[u8] {
244 self
245 }
246}
247
248impl Add for Bytes {
249 type Output = Self;
250
251 fn add(mut self, rhs: Self) -> Self::Output {
252 self += rhs;
253 self
254 }
255}
256
257impl AddAssign for Bytes {
258 fn add_assign(&mut self, rhs: Self) {
259 if rhs.is_empty() {
260 } else if self.is_empty() {
262 *self = rhs;
263 } else if let Some(vec) = Arc::get_mut(&mut self.0)
264 .and_then(|unique| unique.as_any_mut().downcast_mut::<Vec<u8>>())
265 {
266 vec.extend_from_slice(&rhs);
267 } else {
268 *self = Self::new([self.as_slice(), rhs.as_slice()].concat());
269 }
270 }
271}
272
273impl Serialize for Bytes {
274 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
275 where
276 S: Serializer,
277 {
278 if serializer.is_human_readable() {
279 serializer.serialize_str(&self.repr())
280 } else {
281 serializer.serialize_bytes(self)
282 }
283 }
284}
285
286impl TryFrom<&Bytes> for Lines<String> {
287 type Error = Utf8Error;
288
289 #[comemo::memoize]
290 fn try_from(value: &Bytes) -> Result<Lines<String>, Utf8Error> {
291 let text = value.as_str()?;
292 Ok(Lines::new(text.to_string()))
293 }
294}
295
296trait Bytelike: Send + Sync {
298 fn as_bytes(&self) -> &[u8];
299 fn as_str(&self) -> Result<&str, Utf8Error>;
300 fn as_any(&self) -> &dyn Any;
301 fn as_any_mut(&mut self) -> &mut dyn Any;
302}
303
304impl<T> Bytelike for T
305where
306 T: AsRef<[u8]> + Send + Sync + 'static,
307{
308 fn as_bytes(&self) -> &[u8] {
309 self.as_ref()
310 }
311
312 fn as_str(&self) -> Result<&str, Utf8Error> {
313 std::str::from_utf8(self.as_ref())
314 }
315
316 fn as_any(&self) -> &dyn Any {
317 self
318 }
319
320 fn as_any_mut(&mut self) -> &mut dyn Any {
321 self
322 }
323}
324
325impl Hash for dyn Bytelike {
326 fn hash<H: Hasher>(&self, state: &mut H) {
327 self.as_bytes().hash(state);
328 }
329}
330
331struct StrWrapper<T>(T);
333
334impl<T> Bytelike for StrWrapper<T>
335where
336 T: AsRef<str> + Send + Sync + 'static,
337{
338 fn as_bytes(&self) -> &[u8] {
339 self.0.as_ref().as_bytes()
340 }
341
342 fn as_str(&self) -> Result<&str, Utf8Error> {
343 Ok(self.0.as_ref())
344 }
345
346 fn as_any(&self) -> &dyn Any {
347 self
348 }
349
350 fn as_any_mut(&mut self) -> &mut dyn Any {
351 self
352 }
353}
354
355pub struct ToBytes(Bytes);
357
358cast! {
359 ToBytes,
360 v: Str => Self(Bytes::from_string(v)),
361 v: Array => Self(v.iter()
362 .map(|item| match item {
363 Value::Int(byte @ 0..=255) => Ok(*byte as u8),
364 Value::Int(_) => bail!("number must be between 0 and 255"),
365 value => Err(<u8 as Reflect>::error(value)),
366 })
367 .collect::<Result<Vec<u8>, _>>()
368 .map(Bytes::new)?
369 ),
370 v: Bytes => Self(v),
371}
372
373#[cold]
375fn out_of_bounds(index: i64, len: usize) -> EcoString {
376 eco_format!("byte index out of bounds (index: {index}, len: {len})")
377}
378
379#[cold]
381fn out_of_bounds_no_default(index: i64, len: usize) -> EcoString {
382 eco_format!(
383 "byte index out of bounds (index: {index}, len: {len}) \
384 and no default value was specified",
385 )
386}