typst_library/foundations/
bytes.rs1use std::any::Any;
2use std::fmt::{self, Debug, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::{Add, AddAssign, Deref};
5use std::str::Utf8Error;
6use std::sync::Arc;
7
8use ecow::{EcoString, eco_format};
9use serde::{Serialize, Serializer};
10use typst_syntax::{Lines, Source};
11use typst_utils::LazyHash;
12
13use crate::diag::{StrResult, bail};
14use crate::foundations::{Array, Reflect, Repr, Str, Value, cast, func, scope, ty};
15
16#[ty(scope, cast)]
45#[derive(Clone, Hash)]
46pub struct Bytes(Arc<LazyHash<dyn Bytelike>>);
47
48impl Bytes {
49 pub fn new<T>(data: T) -> Self
61 where
62 T: AsRef<[u8]> + Send + Sync + 'static,
63 {
64 Self(Arc::new(LazyHash::new(data)))
65 }
66
67 pub fn from_string<T>(data: T) -> Self
73 where
74 T: AsRef<str> + Send + Sync + 'static,
75 {
76 Self(Arc::new(LazyHash::new(StrWrapper(data))))
77 }
78
79 pub fn is_empty(&self) -> bool {
81 self.as_slice().is_empty()
82 }
83
84 pub fn as_slice(&self) -> &[u8] {
86 self
87 }
88
89 pub fn as_str(&self) -> Result<&str, Utf8Error> {
94 self.inner().as_str()
95 }
96
97 pub fn into_vec(mut self) -> Vec<u8> {
104 match self.to_underlying_mut::<Vec<u8>>() {
105 Some(vec) => std::mem::take(vec),
106 None => self.as_slice().to_vec(),
107 }
108 }
109
110 pub fn into_string(mut self) -> Result<String, IntoStringError> {
119 if let Some(string) = self.to_underlying_string_mut::<String>() {
120 return Ok(std::mem::take(string));
121 }
122
123 let result = if let Some(vec) = self.to_underlying_mut::<Vec<u8>>() {
124 match String::from_utf8(std::mem::take(vec)) {
125 Ok(string) => return Ok(string),
126 Err(err) => {
127 let error = err.utf8_error();
128 *vec = err.into_bytes();
129 Err(error)
130 }
131 }
132 } else {
133 self.as_str().map(ToOwned::to_owned)
134 };
135
136 result.map_err(|error| IntoStringError { bytes: self, error })
137 }
138
139 pub fn to_str(&self) -> Result<Str, Utf8Error> {
146 match (self.inner() as &dyn Any).downcast_ref::<Str>() {
147 Some(string) => Ok(string.clone()),
148 None => self.as_str().map(Into::into),
149 }
150 }
151
152 pub fn lines(&self) -> Result<Lines<String>, Utf8Error> {
159 #[comemo::memoize]
160 fn compute(bytes: &Bytes) -> Result<Lines<String>, Utf8Error> {
161 let text = bytes.as_str()?;
162 Ok(Lines::new(text.to_string()))
163 }
164
165 match self.to_underlying_string::<Source>() {
168 Some(source) => Ok(source.lines().clone()),
169 None => compute(self),
170 }
171 }
172}
173
174impl Bytes {
175 fn locate(&self, index: i64) -> StrResult<usize> {
177 self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len()))
178 }
179
180 fn locate_opt(&self, index: i64) -> Option<usize> {
184 let len = self.as_slice().len();
185 let wrapped =
186 if index >= 0 { Some(index) } else { (len as i64).checked_add(index) };
187 wrapped.and_then(|v| usize::try_from(v).ok()).filter(|&v| v <= len)
188 }
189
190 fn to_underlying_mut<T>(&mut self) -> Option<&mut T>
192 where
193 T: AsRef<[u8]> + Send + Sync + 'static,
194 {
195 Arc::get_mut(&mut self.0).and_then(|unique| {
196 let inner: &mut dyn Bytelike = &mut **unique;
197 (inner as &mut dyn Any).downcast_mut::<T>()
198 })
199 }
200
201 fn to_underlying_string<T>(&self) -> Option<&T>
203 where
204 T: AsRef<str> + Send + Sync + 'static,
205 {
206 (self.inner() as &dyn Any)
207 .downcast_ref::<StrWrapper<T>>()
208 .map(|wrapper| &wrapper.0)
209 }
210
211 fn to_underlying_string_mut<T>(&mut self) -> Option<&mut T>
213 where
214 T: AsRef<str> + Send + Sync + 'static,
215 {
216 Arc::get_mut(&mut self.0).and_then(|unique| {
217 let inner: &mut dyn Bytelike = &mut **unique;
218 (inner as &mut dyn Any)
219 .downcast_mut::<StrWrapper<T>>()
220 .map(|wrapper| &mut wrapper.0)
221 })
222 }
223
224 fn inner(&self) -> &dyn Bytelike {
226 &**self.0
227 }
228}
229
230#[scope]
231impl Bytes {
232 #[func(constructor)]
245 pub fn construct(
246 value: ToBytes,
248 ) -> Bytes {
249 value.0
250 }
251
252 #[func(title = "Length")]
254 pub fn len(&self) -> usize {
255 self.as_slice().len()
256 }
257
258 #[func]
262 pub fn at(
263 &self,
264 index: i64,
266 #[named]
268 default: Option<Value>,
269 ) -> StrResult<Value> {
270 self.locate_opt(index)
271 .and_then(|i| self.as_slice().get(i).map(|&b| Value::Int(b.into())))
272 .or(default)
273 .ok_or_else(|| out_of_bounds_no_default(index, self.len()))
274 }
275
276 #[func]
279 pub fn slice(
280 &self,
281 start: i64,
283 #[default]
286 end: Option<i64>,
287 #[named]
291 count: Option<i64>,
292 ) -> StrResult<Bytes> {
293 let start = self.locate(start)?;
294 let end = end.or(count.map(|c| start as i64 + c));
295 let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
296 let slice = &self.as_slice()[start..end];
297
298 Ok(Bytes::new(slice.to_vec()))
304 }
305}
306
307impl Debug for Bytes {
308 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
309 write!(f, "Bytes({})", self.len())
310 }
311}
312
313impl Repr for Bytes {
314 fn repr(&self) -> EcoString {
315 eco_format!("bytes({})", self.len())
316 }
317}
318
319impl Deref for Bytes {
320 type Target = [u8];
321
322 fn deref(&self) -> &Self::Target {
323 self.inner().as_bytes()
324 }
325}
326
327impl Eq for Bytes {}
328
329impl PartialEq for Bytes {
330 fn eq(&self, other: &Self) -> bool {
331 self.0.eq(&other.0)
332 }
333}
334
335impl AsRef<[u8]> for Bytes {
336 fn as_ref(&self) -> &[u8] {
337 self
338 }
339}
340
341impl Add for Bytes {
342 type Output = Self;
343
344 fn add(mut self, rhs: Self) -> Self::Output {
345 self += rhs;
346 self
347 }
348}
349
350impl AddAssign for Bytes {
351 fn add_assign(&mut self, rhs: Self) {
352 if rhs.is_empty() {
353 } else if self.is_empty() {
355 *self = rhs;
356 } else if let Some(vec) = self.to_underlying_mut::<Vec<u8>>() {
357 vec.extend_from_slice(&rhs);
358 } else {
359 *self = Self::new([self.as_slice(), rhs.as_slice()].concat());
360 }
361 }
362}
363
364impl Serialize for Bytes {
365 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
366 where
367 S: Serializer,
368 {
369 if serializer.is_human_readable() {
370 serializer.serialize_str(&self.repr())
371 } else {
372 serializer.serialize_bytes(self)
373 }
374 }
375}
376
377#[derive(Debug)]
379pub struct IntoStringError {
380 pub bytes: Bytes,
381 pub error: Utf8Error,
382}
383
384trait Bytelike: Any + Send + Sync {
386 fn as_bytes(&self) -> &[u8];
387 fn as_str(&self) -> Result<&str, Utf8Error>;
388}
389
390impl<T> Bytelike for T
391where
392 T: AsRef<[u8]> + Send + Sync + 'static,
393{
394 fn as_bytes(&self) -> &[u8] {
395 self.as_ref()
396 }
397
398 fn as_str(&self) -> Result<&str, Utf8Error> {
399 std::str::from_utf8(self.as_ref())
400 }
401}
402
403impl Hash for dyn Bytelike {
404 fn hash<H: Hasher>(&self, state: &mut H) {
405 self.as_bytes().hash(state);
406 }
407}
408
409struct StrWrapper<T>(T);
411
412impl<T> Bytelike for StrWrapper<T>
413where
414 T: AsRef<str> + Send + Sync + 'static,
415{
416 fn as_bytes(&self) -> &[u8] {
417 self.0.as_ref().as_bytes()
418 }
419
420 fn as_str(&self) -> Result<&str, Utf8Error> {
421 Ok(self.0.as_ref())
422 }
423}
424
425pub struct ToBytes(Bytes);
427
428cast! {
429 ToBytes,
430 v: Str => Self(Bytes::from_string(v)),
431 v: Array => Self(v.iter()
432 .map(|item| match item {
433 Value::Int(byte @ 0..=255) => Ok(*byte as u8),
434 Value::Int(_) => bail!("number must be between 0 and 255"),
435 value => Err(<u8 as Reflect>::error(value)),
436 })
437 .collect::<Result<Vec<u8>, _>>()
438 .map(Bytes::new)?
439 ),
440 v: Bytes => Self(v),
441}
442
443#[cold]
445fn out_of_bounds(index: i64, len: usize) -> EcoString {
446 eco_format!("byte index out of bounds (index: {index}, len: {len})")
447}
448
449#[cold]
451fn out_of_bounds_no_default(index: i64, len: usize) -> EcoString {
452 eco_format!(
453 "byte index out of bounds (index: {index}, len: {len}) \
454 and no default value was specified",
455 )
456}
457
458#[cfg(test)]
459mod tests {
460 use super::*;
461
462 #[test]
464 fn test_bytes_into_string_lone() {
465 let s1 = String::from("hello world");
466 let p1 = s1.as_ptr();
467 let s2 = Bytes::from_string(s1).into_string().unwrap();
468 let p2 = s2.as_ptr();
469 assert!(std::ptr::eq(p1, p2));
470 }
471
472 #[test]
474 fn test_bytes_into_string_shared() {
475 let s1 = String::from("hello world");
476 let p1 = s1.as_ptr();
477 let x = Bytes::from_string(s1);
478 let y = x.clone();
479 let s2 = x.into_string().unwrap();
480 let p2 = s2.as_ptr();
481 let s3 = y.into_string().unwrap();
482 let p3 = s3.as_ptr();
483 assert!(!std::ptr::eq(p1, p2));
485 assert!(std::ptr::eq(p1, p3));
487 }
488
489 #[test]
491 fn test_bytes_into_string_from_vec() {
492 let v1 = String::from("hello world").into_bytes();
493 let p1 = v1.as_ptr();
494 let v2 = Bytes::new(v1).into_string().unwrap().into_bytes();
495 let p2 = v2.as_ptr();
496 assert!(std::ptr::eq(p1, p2));
497 }
498
499 #[test]
502 fn test_bytes_into_string_from_vec_error() {
503 let s = b"hello world\xFF";
504 let err = Bytes::new(Vec::from(s)).into_string().unwrap_err();
505 assert_eq!(err.bytes.as_slice(), s);
506 }
507}