1use crate::{
2 anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper},
3 builtins::{
4 pystr, PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr,
5 PyStrRef, PyTypeRef,
6 },
7 byte::bytes_from_object,
8 cformat::cformat_bytes,
9 common::hash,
10 function::{ArgIterable, Either, OptionalArg, OptionalOption, PyComparisonValue},
11 identifier,
12 literal::escape::Escape,
13 protocol::PyBuffer,
14 sequence::{SequenceExt, SequenceMutExt},
15 types::PyComparisonOp,
16 AsObject, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine,
17};
18use bstr::ByteSlice;
19use itertools::Itertools;
20use malachite_bigint::BigInt;
21use num_traits::ToPrimitive;
22
23#[derive(Debug, Default, Clone)]
24pub struct PyBytesInner {
25 pub(super) elements: Vec<u8>,
26}
27
28impl From<Vec<u8>> for PyBytesInner {
29 fn from(elements: Vec<u8>) -> PyBytesInner {
30 Self { elements }
31 }
32}
33
34impl<'a> TryFromBorrowedObject<'a> for PyBytesInner {
35 fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
36 bytes_from_object(vm, obj).map(Self::from)
37 }
38}
39
40#[derive(FromArgs)]
41pub struct ByteInnerNewOptions {
42 #[pyarg(any, optional)]
43 pub source: OptionalArg<PyObjectRef>,
44 #[pyarg(any, optional)]
45 pub encoding: OptionalArg<PyStrRef>,
46 #[pyarg(any, optional)]
47 pub errors: OptionalArg<PyStrRef>,
48}
49
50impl ByteInnerNewOptions {
51 fn get_value_from_string(
52 s: PyStrRef,
53 encoding: PyStrRef,
54 errors: OptionalArg<PyStrRef>,
55 vm: &VirtualMachine,
56 ) -> PyResult<PyBytesInner> {
57 let bytes = pystr::encode_string(s, Some(encoding), errors.into_option(), vm)?;
58 Ok(bytes.as_bytes().to_vec().into())
59 }
60
61 fn get_value_from_source(source: PyObjectRef, vm: &VirtualMachine) -> PyResult<PyBytesInner> {
62 bytes_from_object(vm, &source).map(|x| x.into())
63 }
64
65 fn get_value_from_size(size: PyIntRef, vm: &VirtualMachine) -> PyResult<PyBytesInner> {
66 let size = size.as_bigint().to_isize().ok_or_else(|| {
67 vm.new_overflow_error("cannot fit 'int' into an index-sized integer".to_owned())
68 })?;
69 let size = if size < 0 {
70 return Err(vm.new_value_error("negative count".to_owned()));
71 } else {
72 size as usize
73 };
74 Ok(vec![0; size].into())
75 }
76
77 pub fn get_bytes(self, cls: PyTypeRef, vm: &VirtualMachine) -> PyResult<PyBytesRef> {
78 let inner = match (&self.source, &self.encoding, &self.errors) {
79 (OptionalArg::Present(obj), OptionalArg::Missing, OptionalArg::Missing) => {
80 let obj = obj.clone();
81 let obj = if cls.is(PyBytes::class(&vm.ctx)) {
83 match obj.downcast_exact::<PyBytes>(vm) {
84 Ok(b) => return Ok(b.into_pyref()),
85 Err(obj) => obj,
86 }
87 } else {
88 obj
89 };
90
91 if let Some(bytes_method) = vm.get_method(obj, identifier!(vm, __bytes__)) {
92 let bytes = bytes_method?.call((), vm)?;
95 let bytes = if cls.is(PyBytes::class(&vm.ctx)) {
96 match bytes.downcast::<PyBytes>() {
97 Ok(b) => return Ok(b),
98 Err(bytes) => bytes,
99 }
100 } else {
101 bytes
102 };
103 Some(PyBytesInner::try_from_borrowed_object(vm, &bytes))
104 } else {
105 None
106 }
107 }
108 _ => None,
109 }
110 .unwrap_or_else(|| self.get_bytearray_inner(vm))?;
111 PyBytes::from(inner).into_ref_with_type(vm, cls)
112 }
113
114 pub fn get_bytearray_inner(self, vm: &VirtualMachine) -> PyResult<PyBytesInner> {
115 const STRING_WITHOUT_ENCODING: &str = "string argument without an encoding";
116 const ENCODING_WITHOUT_STRING: &str = "encoding without a string argument";
117
118 match (self.source, self.encoding, self.errors) {
119 (OptionalArg::Present(obj), OptionalArg::Missing, OptionalArg::Missing) => {
120 match_class!(match obj {
121 i @ PyInt => {
122 Ok(Self::get_value_from_size(i, vm)?)
123 }
124 _s @ PyStr => Err(STRING_WITHOUT_ENCODING),
125 obj => {
126 Ok(Self::get_value_from_source(obj, vm)?)
127 }
128 })
129 }
130 (OptionalArg::Present(obj), OptionalArg::Present(encoding), errors) => {
131 if let Ok(s) = obj.downcast::<PyStr>() {
132 Ok(Self::get_value_from_string(s, encoding, errors, vm)?)
133 } else {
134 Err(ENCODING_WITHOUT_STRING)
135 }
136 }
137 (OptionalArg::Missing, OptionalArg::Missing, OptionalArg::Missing) => {
138 Ok(PyBytesInner::default())
139 }
140 (OptionalArg::Missing, OptionalArg::Present(_), _) => Err(ENCODING_WITHOUT_STRING),
141 (OptionalArg::Missing, _, OptionalArg::Present(_)) => {
142 Err("errors without a string argument")
143 }
144 (OptionalArg::Present(_), OptionalArg::Missing, OptionalArg::Present(_)) => {
145 Err(STRING_WITHOUT_ENCODING)
146 }
147 }
148 .map_err(|e| vm.new_type_error(e.to_owned()))
149 }
150}
151
152#[derive(FromArgs)]
153pub struct ByteInnerFindOptions {
154 #[pyarg(positional)]
155 sub: Either<PyBytesInner, PyIntRef>,
156 #[pyarg(positional, default)]
157 start: Option<PyIntRef>,
158 #[pyarg(positional, default)]
159 end: Option<PyIntRef>,
160}
161
162impl ByteInnerFindOptions {
163 pub fn get_value(
164 self,
165 len: usize,
166 vm: &VirtualMachine,
167 ) -> PyResult<(Vec<u8>, std::ops::Range<usize>)> {
168 let sub = match self.sub {
169 Either::A(v) => v.elements.to_vec(),
170 Either::B(int) => vec![int.as_bigint().byte_or(vm)?],
171 };
172 let range = anystr::adjust_indices(self.start, self.end, len);
173 Ok((sub, range))
174 }
175}
176
177#[derive(FromArgs)]
178pub struct ByteInnerPaddingOptions {
179 #[pyarg(positional)]
180 width: isize,
181 #[pyarg(positional, optional)]
182 fillchar: OptionalArg<PyObjectRef>,
183}
184
185impl ByteInnerPaddingOptions {
186 fn get_value(self, fn_name: &str, vm: &VirtualMachine) -> PyResult<(isize, u8)> {
187 let fillchar = if let OptionalArg::Present(v) = self.fillchar {
188 try_as_bytes(v.clone(), |bytes| bytes.iter().copied().exactly_one().ok())
189 .flatten()
190 .ok_or_else(|| {
191 vm.new_type_error(format!(
192 "{}() argument 2 must be a byte string of length 1, not {}",
193 fn_name,
194 v.class().name()
195 ))
196 })?
197 } else {
198 b' ' };
200
201 Ok((self.width, fillchar))
202 }
203}
204
205#[derive(FromArgs)]
206pub struct ByteInnerTranslateOptions {
207 #[pyarg(positional)]
208 table: Option<PyObjectRef>,
209 #[pyarg(any, optional)]
210 delete: OptionalArg<PyObjectRef>,
211}
212
213impl ByteInnerTranslateOptions {
214 pub fn get_value(self, vm: &VirtualMachine) -> PyResult<(Vec<u8>, Vec<u8>)> {
215 let table = self.table.map_or_else(
216 || Ok((0..=255).collect::<Vec<u8>>()),
217 |v| {
218 let bytes = v
219 .try_into_value::<PyBytesInner>(vm)
220 .ok()
221 .filter(|v| v.elements.len() == 256)
222 .ok_or_else(|| {
223 vm.new_value_error(
224 "translation table must be 256 characters long".to_owned(),
225 )
226 })?;
227 Ok(bytes.elements.to_vec())
228 },
229 )?;
230
231 let delete = match self.delete {
232 OptionalArg::Present(byte) => {
233 let byte: PyBytesInner = byte.try_into_value(vm)?;
234 byte.elements
235 }
236 _ => vec![],
237 };
238
239 Ok((table, delete))
240 }
241}
242
243pub type ByteInnerSplitOptions = anystr::SplitArgs<PyBytesInner>;
244
245impl PyBytesInner {
246 #[inline]
247 pub fn as_bytes(&self) -> &[u8] {
248 &self.elements
249 }
250
251 fn new_repr_overflow_error(vm: &VirtualMachine) -> PyBaseExceptionRef {
252 vm.new_overflow_error("bytes object is too large to make repr".to_owned())
253 }
254
255 pub fn repr_with_name(&self, class_name: &str, vm: &VirtualMachine) -> PyResult<String> {
256 const DECORATION_LEN: isize = 2 + 3; let escape = crate::literal::escape::AsciiEscape::new_repr(&self.elements);
258 let len = escape
259 .layout()
260 .len
261 .and_then(|len| (len as isize).checked_add(DECORATION_LEN + class_name.len() as isize))
262 .ok_or_else(|| Self::new_repr_overflow_error(vm))? as usize;
263 let mut buf = String::with_capacity(len);
264 buf.push_str(class_name);
265 buf.push('(');
266 escape.bytes_repr().write(&mut buf).unwrap();
267 buf.push(')');
268 debug_assert_eq!(buf.len(), len);
269 Ok(buf)
270 }
271
272 pub fn repr_bytes(&self, vm: &VirtualMachine) -> PyResult<String> {
273 let escape = crate::literal::escape::AsciiEscape::new_repr(&self.elements);
274 let len = 3 + escape
275 .layout()
276 .len
277 .ok_or_else(|| Self::new_repr_overflow_error(vm))?;
278 let mut buf = String::with_capacity(len);
279 escape.bytes_repr().write(&mut buf).unwrap();
280 debug_assert_eq!(buf.len(), len);
281 Ok(buf)
282 }
283
284 #[inline]
285 pub fn len(&self) -> usize {
286 self.elements.len()
287 }
288
289 #[inline]
290 pub fn capacity(&self) -> usize {
291 self.elements.capacity()
292 }
293
294 #[inline]
295 pub fn is_empty(&self) -> bool {
296 self.elements.is_empty()
297 }
298
299 pub fn cmp(
300 &self,
301 other: &PyObject,
302 op: PyComparisonOp,
303 vm: &VirtualMachine,
304 ) -> PyComparisonValue {
305 PyComparisonValue::from_option(
308 other
309 .try_bytes_like(vm, |other| op.eval_ord(self.elements.as_slice().cmp(other)))
310 .ok(),
311 )
312 }
313
314 pub fn hash(&self, vm: &VirtualMachine) -> hash::PyHash {
315 vm.state.hash_secret.hash_bytes(&self.elements)
316 }
317
318 pub fn add(&self, other: &[u8]) -> Vec<u8> {
319 self.elements.py_add(other)
320 }
321
322 pub fn contains(
323 &self,
324 needle: Either<PyBytesInner, PyIntRef>,
325 vm: &VirtualMachine,
326 ) -> PyResult<bool> {
327 Ok(match needle {
328 Either::A(byte) => self.elements.contains_str(byte.elements.as_slice()),
329 Either::B(int) => self.elements.contains(&int.as_bigint().byte_or(vm)?),
330 })
331 }
332
333 pub fn isalnum(&self) -> bool {
334 !self.elements.is_empty()
335 && self
336 .elements
337 .iter()
338 .all(|x| char::from(*x).is_alphanumeric())
339 }
340
341 pub fn isalpha(&self) -> bool {
342 !self.elements.is_empty() && self.elements.iter().all(|x| char::from(*x).is_alphabetic())
343 }
344
345 pub fn isascii(&self) -> bool {
346 self.elements.iter().all(|x| char::from(*x).is_ascii())
347 }
348
349 pub fn isdigit(&self) -> bool {
350 !self.elements.is_empty()
351 && self
352 .elements
353 .iter()
354 .all(|x| char::from(*x).is_ascii_digit())
355 }
356
357 pub fn islower(&self) -> bool {
358 self.elements
359 .py_iscase(char::is_lowercase, char::is_uppercase)
360 }
361
362 pub fn isupper(&self) -> bool {
363 self.elements
364 .py_iscase(char::is_uppercase, char::is_lowercase)
365 }
366
367 pub fn isspace(&self) -> bool {
368 !self.elements.is_empty()
369 && self
370 .elements
371 .iter()
372 .all(|x| char::from(*x).is_ascii_whitespace())
373 }
374
375 pub fn istitle(&self) -> bool {
376 if self.elements.is_empty() {
377 return false;
378 }
379
380 let mut iter = self.elements.iter().peekable();
381 let mut prev_cased = false;
382
383 while let Some(c) = iter.next() {
384 let current = char::from(*c);
385 let next = if let Some(k) = iter.peek() {
386 char::from(**k)
387 } else if current.is_uppercase() {
388 return !prev_cased;
389 } else {
390 return prev_cased;
391 };
392
393 let is_cased = current.to_uppercase().next().unwrap() != current
394 || current.to_lowercase().next().unwrap() != current;
395 if (is_cased && next.is_uppercase() && !prev_cased)
396 || (!is_cased && next.is_lowercase())
397 {
398 return false;
399 }
400
401 prev_cased = is_cased;
402 }
403
404 true
405 }
406
407 pub fn lower(&self) -> Vec<u8> {
408 self.elements.to_ascii_lowercase()
409 }
410
411 pub fn upper(&self) -> Vec<u8> {
412 self.elements.to_ascii_uppercase()
413 }
414
415 pub fn capitalize(&self) -> Vec<u8> {
416 let mut new: Vec<u8> = Vec::with_capacity(self.elements.len());
417 if let Some((first, second)) = self.elements.split_first() {
418 new.push(first.to_ascii_uppercase());
419 second.iter().for_each(|x| new.push(x.to_ascii_lowercase()));
420 }
421 new
422 }
423
424 pub fn swapcase(&self) -> Vec<u8> {
425 let mut new: Vec<u8> = Vec::with_capacity(self.elements.len());
426 for w in &self.elements {
427 match w {
428 65..=90 => new.push(w.to_ascii_lowercase()),
429 97..=122 => new.push(w.to_ascii_uppercase()),
430 x => new.push(*x),
431 }
432 }
433 new
434 }
435
436 pub fn hex(
437 &self,
438 sep: OptionalArg<Either<PyStrRef, PyBytesRef>>,
439 bytes_per_sep: OptionalArg<isize>,
440 vm: &VirtualMachine,
441 ) -> PyResult<String> {
442 bytes_to_hex(self.elements.as_slice(), sep, bytes_per_sep, vm)
443 }
444
445 pub fn fromhex(string: &str, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
446 let mut iter = string.bytes().enumerate();
447 let mut bytes: Vec<u8> = Vec::with_capacity(string.len() / 2);
448 let i = loop {
449 let (i, b) = match iter.next() {
450 Some(val) => val,
451 None => {
452 return Ok(bytes);
453 }
454 };
455
456 if is_py_ascii_whitespace(b) {
457 continue;
458 }
459
460 let top = match b {
461 b'0'..=b'9' => b - b'0',
462 b'a'..=b'f' => 10 + b - b'a',
463 b'A'..=b'F' => 10 + b - b'A',
464 _ => break i,
465 };
466
467 let (i, b) = match iter.next() {
468 Some(val) => val,
469 None => break i + 1,
470 };
471
472 let bot = match b {
473 b'0'..=b'9' => b - b'0',
474 b'a'..=b'f' => 10 + b - b'a',
475 b'A'..=b'F' => 10 + b - b'A',
476 _ => break i,
477 };
478
479 bytes.push((top << 4) + bot);
480 };
481
482 Err(vm.new_value_error(format!(
483 "non-hexadecimal number found in fromhex() arg at position {i}"
484 )))
485 }
486
487 #[inline]
488 fn _pad(
489 &self,
490 options: ByteInnerPaddingOptions,
491 pad: fn(&[u8], usize, u8, usize) -> Vec<u8>,
492 vm: &VirtualMachine,
493 ) -> PyResult<Vec<u8>> {
494 let (width, fillchar) = options.get_value("center", vm)?;
495 Ok(if self.len() as isize >= width {
496 Vec::from(&self.elements[..])
497 } else {
498 pad(&self.elements, width as usize, fillchar, self.len())
499 })
500 }
501
502 pub fn center(
503 &self,
504 options: ByteInnerPaddingOptions,
505 vm: &VirtualMachine,
506 ) -> PyResult<Vec<u8>> {
507 self._pad(options, AnyStr::py_center, vm)
508 }
509
510 pub fn ljust(
511 &self,
512 options: ByteInnerPaddingOptions,
513 vm: &VirtualMachine,
514 ) -> PyResult<Vec<u8>> {
515 self._pad(options, AnyStr::py_ljust, vm)
516 }
517
518 pub fn rjust(
519 &self,
520 options: ByteInnerPaddingOptions,
521 vm: &VirtualMachine,
522 ) -> PyResult<Vec<u8>> {
523 self._pad(options, AnyStr::py_rjust, vm)
524 }
525
526 pub fn count(&self, options: ByteInnerFindOptions, vm: &VirtualMachine) -> PyResult<usize> {
527 let (needle, range) = options.get_value(self.elements.len(), vm)?;
528 Ok(self
529 .elements
530 .py_count(needle.as_slice(), range, |h, n| h.find_iter(n).count()))
531 }
532
533 pub fn join(
534 &self,
535 iterable: ArgIterable<PyBytesInner>,
536 vm: &VirtualMachine,
537 ) -> PyResult<Vec<u8>> {
538 let iter = iterable.iter(vm)?;
539 self.elements.py_join(iter)
540 }
541
542 #[inline]
543 pub fn find<F>(
544 &self,
545 options: ByteInnerFindOptions,
546 find: F,
547 vm: &VirtualMachine,
548 ) -> PyResult<Option<usize>>
549 where
550 F: Fn(&[u8], &[u8]) -> Option<usize>,
551 {
552 let (needle, range) = options.get_value(self.elements.len(), vm)?;
553 Ok(self.elements.py_find(&needle, range, find))
554 }
555
556 pub fn maketrans(
557 from: PyBytesInner,
558 to: PyBytesInner,
559 vm: &VirtualMachine,
560 ) -> PyResult<Vec<u8>> {
561 if from.len() != to.len() {
562 return Err(
563 vm.new_value_error("the two maketrans arguments must have equal length".to_owned())
564 );
565 }
566 let mut res = vec![];
567
568 for i in 0..=255 {
569 res.push(if let Some(position) = from.elements.find_byte(i) {
570 to.elements[position]
571 } else {
572 i
573 });
574 }
575
576 Ok(res)
577 }
578
579 pub fn translate(
580 &self,
581 options: ByteInnerTranslateOptions,
582 vm: &VirtualMachine,
583 ) -> PyResult<Vec<u8>> {
584 let (table, delete) = options.get_value(vm)?;
585
586 let mut res = if delete.is_empty() {
587 Vec::with_capacity(self.elements.len())
588 } else {
589 Vec::new()
590 };
591
592 for i in &self.elements {
593 if !delete.contains(i) {
594 res.push(table[*i as usize]);
595 }
596 }
597
598 Ok(res)
599 }
600
601 pub fn strip(&self, chars: OptionalOption<PyBytesInner>) -> Vec<u8> {
602 self.elements
603 .py_strip(
604 chars,
605 |s, chars| s.trim_with(|c| chars.contains(&(c as u8))),
606 |s| s.trim(),
607 )
608 .to_vec()
609 }
610
611 pub fn lstrip(&self, chars: OptionalOption<PyBytesInner>) -> &[u8] {
612 self.elements.py_strip(
613 chars,
614 |s, chars| s.trim_start_with(|c| chars.contains(&(c as u8))),
615 |s| s.trim_start(),
616 )
617 }
618
619 pub fn rstrip(&self, chars: OptionalOption<PyBytesInner>) -> &[u8] {
620 self.elements.py_strip(
621 chars,
622 |s, chars| s.trim_end_with(|c| chars.contains(&(c as u8))),
623 |s| s.trim_end(),
624 )
625 }
626
627 pub fn removeprefix(&self, prefix: PyBytesInner) -> Vec<u8> {
629 self.elements
630 .py_removeprefix(&prefix.elements, prefix.elements.len(), |s, p| {
631 s.starts_with(p)
632 })
633 .to_vec()
634 }
635
636 pub fn removesuffix(&self, suffix: PyBytesInner) -> Vec<u8> {
638 self.elements
639 .py_removesuffix(&suffix.elements, suffix.elements.len(), |s, p| {
640 s.ends_with(p)
641 })
642 .to_vec()
643 }
644
645 pub fn split<F>(
646 &self,
647 options: ByteInnerSplitOptions,
648 convert: F,
649 vm: &VirtualMachine,
650 ) -> PyResult<Vec<PyObjectRef>>
651 where
652 F: Fn(&[u8], &VirtualMachine) -> PyObjectRef,
653 {
654 let elements = self.elements.py_split(
655 options,
656 vm,
657 |v, s, vm| v.split_str(s).map(|v| convert(v, vm)).collect(),
658 |v, s, n, vm| v.splitn_str(n, s).map(|v| convert(v, vm)).collect(),
659 |v, n, vm| v.py_split_whitespace(n, |v| convert(v, vm)),
660 )?;
661 Ok(elements)
662 }
663
664 pub fn rsplit<F>(
665 &self,
666 options: ByteInnerSplitOptions,
667 convert: F,
668 vm: &VirtualMachine,
669 ) -> PyResult<Vec<PyObjectRef>>
670 where
671 F: Fn(&[u8], &VirtualMachine) -> PyObjectRef,
672 {
673 let mut elements = self.elements.py_split(
674 options,
675 vm,
676 |v, s, vm| v.rsplit_str(s).map(|v| convert(v, vm)).collect(),
677 |v, s, n, vm| v.rsplitn_str(n, s).map(|v| convert(v, vm)).collect(),
678 |v, n, vm| v.py_rsplit_whitespace(n, |v| convert(v, vm)),
679 )?;
680 elements.reverse();
681 Ok(elements)
682 }
683
684 pub fn partition(
685 &self,
686 sub: &PyBytesInner,
687 vm: &VirtualMachine,
688 ) -> PyResult<(Vec<u8>, bool, Vec<u8>)> {
689 self.elements.py_partition(
690 &sub.elements,
691 || self.elements.splitn_str(2, &sub.elements),
692 vm,
693 )
694 }
695
696 pub fn rpartition(
697 &self,
698 sub: &PyBytesInner,
699 vm: &VirtualMachine,
700 ) -> PyResult<(Vec<u8>, bool, Vec<u8>)> {
701 self.elements.py_partition(
702 &sub.elements,
703 || self.elements.rsplitn_str(2, &sub.elements),
704 vm,
705 )
706 }
707
708 pub fn expandtabs(&self, options: anystr::ExpandTabsArgs) -> Vec<u8> {
709 let tabsize = options.tabsize();
710 let mut counter: usize = 0;
711 let mut res = vec![];
712
713 if tabsize == 0 {
714 return self
715 .elements
716 .iter()
717 .copied()
718 .filter(|x| *x != b'\t')
719 .collect();
720 }
721
722 for i in &self.elements {
723 if *i == b'\t' {
724 let len = tabsize - counter % tabsize;
725 res.extend_from_slice(&vec![b' '; len]);
726 counter += len;
727 } else {
728 res.push(*i);
729 if *i == b'\r' || *i == b'\n' {
730 counter = 0;
731 } else {
732 counter += 1;
733 }
734 }
735 }
736
737 res
738 }
739
740 pub fn splitlines<FW, W>(&self, options: anystr::SplitLinesArgs, into_wrapper: FW) -> Vec<W>
741 where
742 FW: Fn(&[u8]) -> W,
743 {
744 self.elements.py_bytes_splitlines(options, into_wrapper)
745 }
746
747 pub fn zfill(&self, width: isize) -> Vec<u8> {
748 self.elements.py_zfill(width)
749 }
750
751 fn replace_interleave(&self, to: PyBytesInner, maxcount: Option<usize>) -> Vec<u8> {
753 let place_count = self.elements.len() + 1;
754 let count = maxcount.map_or(place_count, |v| std::cmp::min(v, place_count)) - 1;
755 let capacity = self.elements.len() + count * to.len();
756 let mut result = Vec::with_capacity(capacity);
757 let to_slice = to.elements.as_slice();
758 result.extend_from_slice(to_slice);
759 for c in &self.elements[..count] {
760 result.push(*c);
761 result.extend_from_slice(to_slice);
762 }
763 result.extend_from_slice(&self.elements[count..]);
764 result
765 }
766
767 fn replace_delete(&self, from: PyBytesInner, maxcount: Option<usize>) -> Vec<u8> {
768 let count = count_substring(self.elements.as_slice(), from.elements.as_slice(), maxcount);
769 if count == 0 {
770 return self.elements.clone();
772 }
773
774 let result_len = self.len() - (count * from.len());
775 debug_assert!(self.len() >= count * from.len());
776
777 let mut result = Vec::with_capacity(result_len);
778 let mut last_end = 0;
779 let mut count = count;
780 for offset in self.elements.find_iter(&from.elements) {
781 result.extend_from_slice(&self.elements[last_end..offset]);
782 last_end = offset + from.len();
783 count -= 1;
784 if count == 0 {
785 break;
786 }
787 }
788 result.extend_from_slice(&self.elements[last_end..]);
789 result
790 }
791
792 pub fn replace_in_place(
793 &self,
794 from: PyBytesInner,
795 to: PyBytesInner,
796 maxcount: Option<usize>,
797 ) -> Vec<u8> {
798 let len = from.len();
799 let mut iter = self.elements.find_iter(&from.elements);
800
801 let mut new = if let Some(offset) = iter.next() {
802 let mut new = self.elements.clone();
803 new[offset..offset + len].clone_from_slice(to.elements.as_slice());
804 if maxcount == Some(1) {
805 return new;
806 } else {
807 new
808 }
809 } else {
810 return self.elements.clone();
811 };
812
813 let mut count = maxcount.unwrap_or(usize::MAX) - 1;
814 for offset in iter {
815 new[offset..offset + len].clone_from_slice(to.elements.as_slice());
816 count -= 1;
817 if count == 0 {
818 break;
819 }
820 }
821 new
822 }
823
824 fn replace_general(
825 &self,
826 from: PyBytesInner,
827 to: PyBytesInner,
828 maxcount: Option<usize>,
829 vm: &VirtualMachine,
830 ) -> PyResult<Vec<u8>> {
831 let count = count_substring(self.elements.as_slice(), from.elements.as_slice(), maxcount);
832 if count == 0 {
833 return Ok(self.elements.clone());
835 }
836
837 debug_assert!(count > 0);
840 if to.len() as isize - from.len() as isize
841 > (isize::MAX - self.elements.len() as isize) / count as isize
842 {
843 return Err(vm.new_overflow_error("replace bytes is too long".to_owned()));
844 }
845 let result_len = (self.elements.len() as isize
846 + count as isize * (to.len() as isize - from.len() as isize))
847 as usize;
848
849 let mut result = Vec::with_capacity(result_len);
850 let mut last_end = 0;
851 let mut count = count;
852 for offset in self.elements.find_iter(&from.elements) {
853 result.extend_from_slice(&self.elements[last_end..offset]);
854 result.extend_from_slice(to.elements.as_slice());
855 last_end = offset + from.len();
856 count -= 1;
857 if count == 0 {
858 break;
859 }
860 }
861 result.extend_from_slice(&self.elements[last_end..]);
862 Ok(result)
863 }
864
865 pub fn replace(
866 &self,
867 from: PyBytesInner,
868 to: PyBytesInner,
869 maxcount: OptionalArg<isize>,
870 vm: &VirtualMachine,
871 ) -> PyResult<Vec<u8>> {
872 let maxcount = match maxcount {
874 OptionalArg::Present(maxcount) if maxcount >= 0 => {
875 if maxcount == 0 || (self.elements.is_empty() && !from.is_empty()) {
876 return Ok(self.elements.clone());
878 } else if self.elements.is_empty() && from.is_empty() {
879 return Ok(to.elements);
880 }
881 Some(maxcount as usize)
882 }
883 _ => None,
884 };
885
886 if from.elements.is_empty() {
888 if to.elements.is_empty() {
889 return Ok(self.elements.clone());
891 }
892 return Ok(self.replace_interleave(to, maxcount));
896 }
897
898 if self.elements.is_empty() {
902 return Ok(self.elements.clone());
903 }
904
905 if to.elements.is_empty() {
906 Ok(self.replace_delete(from, maxcount))
908 } else if from.len() == to.len() {
909 Ok(self.replace_in_place(from, to, maxcount))
911 } else {
912 self.replace_general(from, to, maxcount, vm)
914 }
915 }
916
917 pub fn title(&self) -> Vec<u8> {
918 let mut res = vec![];
919 let mut spaced = true;
920
921 for i in &self.elements {
922 match i {
923 65..=90 | 97..=122 => {
924 if spaced {
925 res.push(i.to_ascii_uppercase());
926 spaced = false
927 } else {
928 res.push(i.to_ascii_lowercase());
929 }
930 }
931 _ => {
932 res.push(*i);
933 spaced = true
934 }
935 }
936 }
937
938 res
939 }
940
941 pub fn cformat(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
942 cformat_bytes(vm, self.elements.as_slice(), values)
943 }
944
945 pub fn mul(&self, n: isize, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
946 self.elements.mul(vm, n)
947 }
948
949 pub fn imul(&mut self, n: isize, vm: &VirtualMachine) -> PyResult<()> {
950 self.elements.imul(vm, n)
951 }
952
953 pub fn concat(&self, other: &PyObject, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
954 let buffer = PyBuffer::try_from_borrowed_object(vm, other)?;
955 let borrowed = buffer.as_contiguous();
956 if let Some(other) = borrowed {
957 let mut v = Vec::with_capacity(self.elements.len() + other.len());
958 v.extend_from_slice(&self.elements);
959 v.extend_from_slice(&other);
960 Ok(v)
961 } else {
962 let mut v = self.elements.clone();
963 buffer.append_to(&mut v);
964 Ok(v)
965 }
966 }
967}
968
969pub fn try_as_bytes<F, R>(obj: PyObjectRef, f: F) -> Option<R>
970where
971 F: Fn(&[u8]) -> R,
972{
973 match_class!(match obj {
974 i @ PyBytes => Some(f(i.as_bytes())),
975 j @ PyByteArray => Some(f(&j.borrow_buf())),
976 _ => None,
977 })
978}
979
980#[inline]
981fn count_substring(haystack: &[u8], needle: &[u8], maxcount: Option<usize>) -> usize {
982 let substrings = haystack.find_iter(needle);
983 if let Some(maxcount) = maxcount {
984 std::cmp::min(substrings.take(maxcount).count(), maxcount)
985 } else {
986 substrings.count()
987 }
988}
989
990pub trait ByteOr: ToPrimitive {
991 fn byte_or(&self, vm: &VirtualMachine) -> PyResult<u8> {
992 match self.to_u8() {
993 Some(value) => Ok(value),
994 None => Err(vm.new_value_error("byte must be in range(0, 256)".to_owned())),
995 }
996 }
997}
998
999impl ByteOr for BigInt {}
1000
1001impl AnyStrWrapper for PyBytesInner {
1002 type Str = [u8];
1003 fn as_ref(&self) -> &[u8] {
1004 &self.elements
1005 }
1006}
1007
1008impl AnyStrContainer<[u8]> for Vec<u8> {
1009 fn new() -> Self {
1010 Vec::new()
1011 }
1012
1013 fn with_capacity(capacity: usize) -> Self {
1014 Vec::with_capacity(capacity)
1015 }
1016
1017 fn push_str(&mut self, other: &[u8]) {
1018 self.extend(other)
1019 }
1020}
1021
1022const ASCII_WHITESPACES: [u8; 6] = [0x20, 0x09, 0x0a, 0x0c, 0x0d, 0x0b];
1023
1024impl AnyStr for [u8] {
1025 type Char = u8;
1026 type Container = Vec<u8>;
1027
1028 fn element_bytes_len(_: u8) -> usize {
1029 1
1030 }
1031
1032 fn to_container(&self) -> Self::Container {
1033 self.to_vec()
1034 }
1035
1036 fn as_bytes(&self) -> &[u8] {
1037 self
1038 }
1039
1040 fn as_utf8_str(&self) -> Result<&str, std::str::Utf8Error> {
1041 std::str::from_utf8(self)
1042 }
1043
1044 fn chars(&self) -> impl Iterator<Item = char> {
1045 bstr::ByteSlice::chars(self)
1046 }
1047
1048 fn elements(&self) -> impl Iterator<Item = u8> {
1049 self.iter().copied()
1050 }
1051
1052 fn get_bytes(&self, range: std::ops::Range<usize>) -> &Self {
1053 &self[range]
1054 }
1055
1056 fn get_chars(&self, range: std::ops::Range<usize>) -> &Self {
1057 &self[range]
1058 }
1059
1060 fn is_empty(&self) -> bool {
1061 Self::is_empty(self)
1062 }
1063
1064 fn bytes_len(&self) -> usize {
1065 Self::len(self)
1066 }
1067
1068 fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
1069 where
1070 F: Fn(&Self) -> PyObjectRef,
1071 {
1072 let mut splits = Vec::new();
1073 let mut count = maxsplit;
1074 let mut haystack = self;
1075 while let Some(offset) = haystack.find_byteset(ASCII_WHITESPACES) {
1076 if offset != 0 {
1077 if count == 0 {
1078 break;
1079 }
1080 splits.push(convert(&haystack[..offset]));
1081 count -= 1;
1082 }
1083 haystack = &haystack[offset + 1..];
1084 }
1085 if !haystack.is_empty() {
1086 splits.push(convert(haystack));
1087 }
1088 splits
1089 }
1090
1091 fn py_rsplit_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
1092 where
1093 F: Fn(&Self) -> PyObjectRef,
1094 {
1095 let mut splits = Vec::new();
1096 let mut count = maxsplit;
1097 let mut haystack = self;
1098 while let Some(offset) = haystack.rfind_byteset(ASCII_WHITESPACES) {
1099 if offset + 1 != haystack.len() {
1100 if count == 0 {
1101 break;
1102 }
1103 splits.push(convert(&haystack[offset + 1..]));
1104 count -= 1;
1105 }
1106 haystack = &haystack[..offset];
1107 }
1108 if !haystack.is_empty() {
1109 splits.push(convert(haystack));
1110 }
1111 splits
1112 }
1113}
1114
1115#[derive(FromArgs)]
1116pub struct DecodeArgs {
1117 #[pyarg(any, default)]
1118 encoding: Option<PyStrRef>,
1119 #[pyarg(any, default)]
1120 errors: Option<PyStrRef>,
1121}
1122
1123pub fn bytes_decode(
1124 zelf: PyObjectRef,
1125 args: DecodeArgs,
1126 vm: &VirtualMachine,
1127) -> PyResult<PyStrRef> {
1128 let DecodeArgs { encoding, errors } = args;
1129 let encoding = encoding
1130 .as_ref()
1131 .map_or(crate::codecs::DEFAULT_ENCODING, |s| s.as_str());
1132 vm.state
1133 .codec_registry
1134 .decode_text(zelf, encoding, errors, vm)
1135}
1136
1137fn hex_impl_no_sep(bytes: &[u8]) -> String {
1138 let mut buf: Vec<u8> = vec![0; bytes.len() * 2];
1139 hex::encode_to_slice(bytes, buf.as_mut_slice()).unwrap();
1140 unsafe { String::from_utf8_unchecked(buf) }
1141}
1142
1143fn hex_impl(bytes: &[u8], sep: u8, bytes_per_sep: isize) -> String {
1144 let len = bytes.len();
1145
1146 let buf = if bytes_per_sep < 0 {
1147 let bytes_per_sep = std::cmp::min(len, (-bytes_per_sep) as usize);
1148 let chunks = (len - 1) / bytes_per_sep;
1149 let chunked = chunks * bytes_per_sep;
1150 let unchunked = len - chunked;
1151 let mut buf = vec![0; len * 2 + chunks];
1152 let mut j = 0;
1153 for i in (0..chunks).map(|i| i * bytes_per_sep) {
1154 hex::encode_to_slice(
1155 &bytes[i..i + bytes_per_sep],
1156 &mut buf[j..j + bytes_per_sep * 2],
1157 )
1158 .unwrap();
1159 j += bytes_per_sep * 2;
1160 buf[j] = sep;
1161 j += 1;
1162 }
1163 hex::encode_to_slice(&bytes[chunked..], &mut buf[j..j + unchunked * 2]).unwrap();
1164 buf
1165 } else {
1166 let bytes_per_sep = std::cmp::min(len, bytes_per_sep as usize);
1167 let chunks = (len - 1) / bytes_per_sep;
1168 let chunked = chunks * bytes_per_sep;
1169 let unchunked = len - chunked;
1170 let mut buf = vec![0; len * 2 + chunks];
1171 hex::encode_to_slice(&bytes[..unchunked], &mut buf[..unchunked * 2]).unwrap();
1172 let mut j = unchunked * 2;
1173 for i in (0..chunks).map(|i| i * bytes_per_sep + unchunked) {
1174 buf[j] = sep;
1175 j += 1;
1176 hex::encode_to_slice(
1177 &bytes[i..i + bytes_per_sep],
1178 &mut buf[j..j + bytes_per_sep * 2],
1179 )
1180 .unwrap();
1181 j += bytes_per_sep * 2;
1182 }
1183 buf
1184 };
1185
1186 unsafe { String::from_utf8_unchecked(buf) }
1187}
1188
1189pub fn bytes_to_hex(
1190 bytes: &[u8],
1191 sep: OptionalArg<Either<PyStrRef, PyBytesRef>>,
1192 bytes_per_sep: OptionalArg<isize>,
1193 vm: &VirtualMachine,
1194) -> PyResult<String> {
1195 if bytes.is_empty() {
1196 return Ok("".to_owned());
1197 }
1198
1199 if let OptionalArg::Present(sep) = sep {
1200 let bytes_per_sep = bytes_per_sep.unwrap_or(1);
1201 if bytes_per_sep == 0 {
1202 return Ok(hex_impl_no_sep(bytes));
1203 }
1204
1205 let s_guard;
1206 let b_guard;
1207 let sep = match &sep {
1208 Either::A(s) => {
1209 s_guard = s.as_str();
1210 s_guard.as_bytes()
1211 }
1212 Either::B(bytes) => {
1213 b_guard = bytes.as_bytes();
1214 b_guard
1215 }
1216 };
1217
1218 if sep.len() != 1 {
1219 return Err(vm.new_value_error("sep must be length 1.".to_owned()));
1220 }
1221 let sep = sep[0];
1222 if sep > 127 {
1223 return Err(vm.new_value_error("sep must be ASCII.".to_owned()));
1224 }
1225
1226 Ok(hex_impl(bytes, sep, bytes_per_sep))
1227 } else {
1228 Ok(hex_impl_no_sep(bytes))
1229 }
1230}
1231
1232pub const fn is_py_ascii_whitespace(b: u8) -> bool {
1233 matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B')
1234}