1use super::{
2 PositionIterInternal, PyBytesRef, PyDict, PyTupleRef, PyType, PyTypeRef,
3 int::{PyInt, PyIntRef},
4 iter::{
5 IterStatus::{self, Exhausted},
6 builtins_iter,
7 },
8};
9use crate::common::lock::LazyLock;
10use crate::{
11 AsObject, Context, Py, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult,
12 TryFromBorrowedObject, VirtualMachine,
13 anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper, adjust_indices},
14 atomic_func,
15 cformat::cformat_string,
16 class::PyClassImpl,
17 common::str::{PyKindStr, StrData, StrKind},
18 convert::{IntoPyException, ToPyException, ToPyObject, ToPyResult},
19 format::{format, format_map},
20 function::{ArgIterable, ArgSize, FuncArgs, OptionalArg, OptionalOption, PyComparisonValue},
21 intern::PyInterned,
22 object::{MaybeTraverse, Traverse, TraverseFn},
23 protocol::{PyIterReturn, PyMappingMethods, PyNumberMethods, PySequenceMethods},
24 sequence::SequenceExt,
25 sliceable::{SequenceIndex, SliceableSequenceOp},
26 types::{
27 AsMapping, AsNumber, AsSequence, Comparable, Constructor, Hashable, IterNext, Iterable,
28 PyComparisonOp, Representable, SelfIter,
29 },
30};
31use alloc::{borrow::Cow, fmt};
32use ascii::{AsciiChar, AsciiStr, AsciiString};
33use bstr::ByteSlice;
34use core::{char, mem, ops::Range};
35use itertools::Itertools;
36use num_traits::ToPrimitive;
37use rustpython_common::{
38 ascii,
39 atomic::{self, PyAtomic, Radium},
40 format::{FormatSpec, FormatString, FromTemplate},
41 hash,
42 lock::PyMutex,
43 str::DeduceStrKind,
44 wtf8::{CodePoint, Wtf8, Wtf8Buf, Wtf8Chunk, Wtf8Concat},
45};
46use unic_ucd_bidi::BidiClass;
47use unic_ucd_category::GeneralCategory;
48use unic_ucd_ident::{is_xid_continue, is_xid_start};
49use unicode_casing::CharExt;
50
51impl<'a> TryFromBorrowedObject<'a> for String {
52 fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
53 obj.try_value_with(|pystr: &PyUtf8Str| Ok(pystr.as_str().to_owned()), vm)
54 }
55}
56
57impl<'a> TryFromBorrowedObject<'a> for &'a str {
58 fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
59 let pystr: &Py<PyUtf8Str> = TryFromBorrowedObject::try_from_borrowed_object(vm, obj)?;
60 Ok(pystr.as_str())
61 }
62}
63
64impl<'a> TryFromBorrowedObject<'a> for &'a Wtf8 {
65 fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
66 let pystr: &Py<PyStr> = TryFromBorrowedObject::try_from_borrowed_object(vm, obj)?;
67 Ok(pystr.as_wtf8())
68 }
69}
70
71pub type PyStrRef = PyRef<PyStr>;
72pub type PyUtf8StrRef = PyRef<PyUtf8Str>;
73
74#[pyclass(module = false, name = "str")]
75pub struct PyStr {
76 data: StrData,
77 hash: PyAtomic<hash::PyHash>,
78}
79
80impl fmt::Debug for PyStr {
81 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82 f.debug_struct("PyStr")
83 .field("value", &self.as_wtf8())
84 .field("kind", &self.data.kind())
85 .field("hash", &self.hash)
86 .finish()
87 }
88}
89
90impl AsRef<str> for PyStr {
91 #[track_caller] fn as_ref(&self) -> &str {
93 self.to_str().expect("str has surrogates")
94 }
95}
96
97impl AsRef<str> for Py<PyStr> {
98 #[track_caller] fn as_ref(&self) -> &str {
100 self.to_str().expect("str has surrogates")
101 }
102}
103
104impl AsRef<str> for PyStrRef {
105 #[track_caller] fn as_ref(&self) -> &str {
107 self.to_str().expect("str has surrogates")
108 }
109}
110
111impl AsRef<Wtf8> for PyStr {
112 fn as_ref(&self) -> &Wtf8 {
113 self.as_wtf8()
114 }
115}
116
117impl AsRef<Wtf8> for Py<PyStr> {
118 fn as_ref(&self) -> &Wtf8 {
119 self.as_wtf8()
120 }
121}
122
123impl AsRef<Wtf8> for PyStrRef {
124 fn as_ref(&self) -> &Wtf8 {
125 self.as_wtf8()
126 }
127}
128
129impl Wtf8Concat for PyStr {
130 #[inline]
131 fn fmt_wtf8(&self, buf: &mut Wtf8Buf) {
132 buf.push_wtf8(self.as_wtf8());
133 }
134}
135
136impl Wtf8Concat for Py<PyStr> {
137 #[inline]
138 fn fmt_wtf8(&self, buf: &mut Wtf8Buf) {
139 buf.push_wtf8(self.as_wtf8());
140 }
141}
142
143impl<'a> From<&'a AsciiStr> for PyStr {
144 fn from(s: &'a AsciiStr) -> Self {
145 s.to_owned().into()
146 }
147}
148
149impl From<AsciiString> for PyStr {
150 fn from(s: AsciiString) -> Self {
151 s.into_boxed_ascii_str().into()
152 }
153}
154
155impl From<Box<AsciiStr>> for PyStr {
156 fn from(s: Box<AsciiStr>) -> Self {
157 StrData::from(s).into()
158 }
159}
160
161impl From<AsciiChar> for PyStr {
162 fn from(ch: AsciiChar) -> Self {
163 AsciiString::from(ch).into()
164 }
165}
166
167impl<'a> From<&'a str> for PyStr {
168 fn from(s: &'a str) -> Self {
169 s.to_owned().into()
170 }
171}
172
173impl<'a> From<&'a Wtf8> for PyStr {
174 fn from(s: &'a Wtf8) -> Self {
175 s.to_owned().into()
176 }
177}
178
179impl From<String> for PyStr {
180 fn from(s: String) -> Self {
181 s.into_boxed_str().into()
182 }
183}
184
185impl From<Wtf8Buf> for PyStr {
186 fn from(w: Wtf8Buf) -> Self {
187 w.into_box().into()
188 }
189}
190
191impl From<char> for PyStr {
192 fn from(ch: char) -> Self {
193 StrData::from(ch).into()
194 }
195}
196
197impl From<CodePoint> for PyStr {
198 fn from(ch: CodePoint) -> Self {
199 StrData::from(ch).into()
200 }
201}
202
203impl From<StrData> for PyStr {
204 fn from(data: StrData) -> Self {
205 Self {
206 data,
207 hash: Radium::new(hash::SENTINEL),
208 }
209 }
210}
211
212impl<'a> From<alloc::borrow::Cow<'a, str>> for PyStr {
213 fn from(s: alloc::borrow::Cow<'a, str>) -> Self {
214 s.into_owned().into()
215 }
216}
217
218impl From<Box<str>> for PyStr {
219 #[inline]
220 fn from(value: Box<str>) -> Self {
221 StrData::from(value).into()
222 }
223}
224
225impl From<Box<Wtf8>> for PyStr {
226 #[inline]
227 fn from(value: Box<Wtf8>) -> Self {
228 StrData::from(value).into()
229 }
230}
231
232impl Default for PyStr {
233 fn default() -> Self {
234 Self {
235 data: StrData::default(),
236 hash: Radium::new(hash::SENTINEL),
237 }
238 }
239}
240
241impl fmt::Display for PyStr {
242 #[inline]
243 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
244 self.as_wtf8().fmt(f)
245 }
246}
247
248pub trait AsPyStr<'a>
249where
250 Self: 'a,
251{
252 #[allow(
253 clippy::wrong_self_convention,
254 reason = "this trait is intentionally implemented for references"
255 )]
256 fn as_pystr(self, ctx: &Context) -> &'a Py<PyStr>;
257}
258
259impl<'a> AsPyStr<'a> for &'a Py<PyStr> {
260 #[inline]
261 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
262 self
263 }
264}
265
266impl<'a> AsPyStr<'a> for &'a Py<PyUtf8Str> {
267 #[inline]
268 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
269 Py::<PyUtf8Str>::as_pystr(self)
270 }
271}
272
273impl<'a> AsPyStr<'a> for &'a PyStrRef {
274 #[inline]
275 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
276 self
277 }
278}
279
280impl<'a> AsPyStr<'a> for &'a PyUtf8StrRef {
281 #[inline]
282 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
283 Py::<PyUtf8Str>::as_pystr(self)
284 }
285}
286
287impl AsPyStr<'static> for &'static str {
288 #[inline]
289 fn as_pystr(self, ctx: &Context) -> &'static Py<PyStr> {
290 ctx.intern_str(self)
291 }
292}
293
294impl<'a> AsPyStr<'a> for &'a PyStrInterned {
295 #[inline]
296 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
297 self
298 }
299}
300
301impl<'a> AsPyStr<'a> for &'a PyUtf8StrInterned {
302 #[inline]
303 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
304 Py::<PyUtf8Str>::as_pystr(self)
305 }
306}
307
308#[pyclass(module = false, name = "str_iterator", traverse = "manual")]
309#[derive(Debug)]
310pub struct PyStrIterator {
311 internal: PyMutex<(PositionIterInternal<PyStrRef>, usize)>,
312}
313
314unsafe impl Traverse for PyStrIterator {
315 fn traverse(&self, tracer: &mut TraverseFn<'_>) {
316 self.internal.lock().0.traverse(tracer);
318 }
319}
320
321impl PyPayload for PyStrIterator {
322 fn class(ctx: &Context) -> &'static Py<PyType> {
323 ctx.types.str_iterator_type
324 }
325}
326
327#[pyclass(flags(DISALLOW_INSTANTIATION), with(IterNext, Iterable))]
328impl PyStrIterator {
329 #[pymethod]
330 fn __length_hint__(&self) -> usize {
331 self.internal.lock().0.length_hint(|obj| obj.char_len())
332 }
333
334 #[pymethod]
335 fn __setstate__(&self, state: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
336 let mut internal = self.internal.lock();
337 internal.1 = usize::MAX;
338 internal
339 .0
340 .set_state(state, |obj, pos| pos.min(obj.char_len()), vm)
341 }
342
343 #[pymethod]
344 fn __reduce__(&self, vm: &VirtualMachine) -> PyTupleRef {
345 let func = builtins_iter(vm);
346 self.internal.lock().0.reduce(
347 func,
348 |x| x.clone().into(),
349 |vm| vm.ctx.empty_str.to_owned().into(),
350 vm,
351 )
352 }
353}
354
355impl SelfIter for PyStrIterator {}
356
357impl IterNext for PyStrIterator {
358 fn next(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<PyIterReturn> {
359 let mut internal = zelf.internal.lock();
360
361 if let IterStatus::Active(s) = &internal.0.status {
362 let value = s.as_wtf8();
363
364 if internal.1 == usize::MAX {
365 if let Some((offset, ch)) = value.code_point_indices().nth(internal.0.position) {
366 internal.0.position += 1;
367 internal.1 = offset + ch.len_wtf8();
368 return Ok(PyIterReturn::Return(ch.to_pyobject(vm)));
369 }
370 } else if let Some(value) = value.get(internal.1..)
371 && let Some(ch) = value.code_points().next()
372 {
373 internal.0.position += 1;
374 internal.1 += ch.len_wtf8();
375 return Ok(PyIterReturn::Return(ch.to_pyobject(vm)));
376 }
377 internal.0.status = Exhausted;
378 }
379 Ok(PyIterReturn::StopIteration(None))
380 }
381}
382
383#[derive(FromArgs)]
384pub struct StrArgs {
385 #[pyarg(any, optional)]
386 object: OptionalArg<PyObjectRef>,
387 #[pyarg(any, optional)]
388 encoding: OptionalArg<PyUtf8StrRef>,
389 #[pyarg(any, optional)]
390 errors: OptionalArg<PyUtf8StrRef>,
391}
392
393impl Constructor for PyStr {
394 type Args = StrArgs;
395
396 fn slot_new(cls: PyTypeRef, func_args: FuncArgs, vm: &VirtualMachine) -> PyResult {
397 if cls.is(vm.ctx.types.str_type)
399 && func_args.args.len() == 1
400 && func_args.kwargs.is_empty()
401 && func_args.args[0].class().is(vm.ctx.types.str_type)
402 {
403 return Ok(func_args.args[0].clone());
404 }
405
406 let args: Self::Args = func_args.bind(vm)?;
407 let payload = Self::py_new(&cls, args, vm)?;
408 payload.into_ref_with_type(vm, cls).map(Into::into)
409 }
410
411 fn py_new(_cls: &Py<PyType>, args: Self::Args, vm: &VirtualMachine) -> PyResult<Self> {
412 match args.object {
413 OptionalArg::Present(input) => {
414 if let OptionalArg::Present(enc) = args.encoding {
415 let s = vm.state.codec_registry.decode_text(
416 input,
417 enc.as_str(),
418 args.errors.into_option(),
419 vm,
420 )?;
421 Ok(Self::from(s.as_wtf8().to_owned()))
422 } else {
423 let s = input.str(vm)?;
424 Ok(Self::from(s.as_wtf8().to_owned()))
425 }
426 }
427 OptionalArg::Missing => Ok(Self::from(String::new())),
428 }
429 }
430}
431
432impl PyStr {
433 unsafe fn new_str_unchecked(data: Box<Wtf8>, kind: StrKind) -> Self {
435 unsafe { StrData::new_str_unchecked(data, kind) }.into()
436 }
437
438 unsafe fn new_with_char_len<T: DeduceStrKind + Into<Box<Wtf8>>>(s: T, char_len: usize) -> Self {
439 let kind = s.str_kind();
440 unsafe { StrData::new_with_char_len(s.into(), kind, char_len) }.into()
441 }
442
443 pub unsafe fn new_ascii_unchecked(bytes: Vec<u8>) -> Self {
446 unsafe { AsciiString::from_ascii_unchecked(bytes) }.into()
447 }
448
449 #[deprecated(note = "use PyStr::from(...).into_ref() instead")]
450 pub fn new_ref(zelf: impl Into<Self>, ctx: &Context) -> PyRef<Self> {
451 let zelf = zelf.into();
452 zelf.into_ref(ctx)
453 }
454
455 fn new_substr(&self, s: Wtf8Buf) -> Self {
456 let kind = if self.kind().is_ascii() || s.is_ascii() {
457 StrKind::Ascii
458 } else if self.kind().is_utf8() || s.is_utf8() {
459 StrKind::Utf8
460 } else {
461 StrKind::Wtf8
462 };
463 unsafe {
464 Self::new_str_unchecked(s.into(), kind)
466 }
467 }
468
469 #[inline]
470 pub const fn as_wtf8(&self) -> &Wtf8 {
471 self.data.as_wtf8()
472 }
473
474 pub const fn as_bytes(&self) -> &[u8] {
475 self.data.as_wtf8().as_bytes()
476 }
477
478 pub fn to_str(&self) -> Option<&str> {
479 self.data.as_str()
480 }
481
482 #[inline]
487 #[track_caller]
488 pub fn expect_str(&self) -> &str {
489 self.to_str().expect("PyStr contains surrogates")
490 }
491
492 pub(crate) fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> {
493 if self.is_utf8() {
494 Ok(())
495 } else {
496 let start = self
497 .as_wtf8()
498 .code_points()
499 .position(|c| c.to_char().is_none())
500 .unwrap();
501 Err(vm.new_unicode_encode_error_real(
502 identifier!(vm, utf_8).to_owned(),
503 vm.ctx.new_str(self.data.clone()),
504 start,
505 start + 1,
506 vm.ctx.new_str("surrogates not allowed"),
507 ))
508 }
509 }
510
511 pub fn to_string_lossy(&self) -> Cow<'_, str> {
512 self.to_str()
513 .map(Cow::Borrowed)
514 .unwrap_or_else(|| self.as_wtf8().to_string_lossy())
515 }
516
517 pub const fn kind(&self) -> StrKind {
518 self.data.kind()
519 }
520
521 #[inline]
522 pub fn as_str_kind(&self) -> PyKindStr<'_> {
523 self.data.as_str_kind()
524 }
525
526 pub const fn is_utf8(&self) -> bool {
527 self.kind().is_utf8()
528 }
529
530 fn char_all<F>(&self, test: F) -> bool
531 where
532 F: Fn(char) -> bool,
533 {
534 match self.as_str_kind() {
535 PyKindStr::Ascii(s) => s.chars().all(|ch| test(ch.into())),
536 PyKindStr::Utf8(s) => s.chars().all(test),
537 PyKindStr::Wtf8(w) => w.code_points().all(|ch| ch.is_char_and(&test)),
538 }
539 }
540
541 fn repeat(zelf: PyRef<Self>, value: isize, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
542 if value == 0 && zelf.class().is(vm.ctx.types.str_type) {
543 return Ok(vm.ctx.empty_str.to_owned());
546 }
547 if (value == 1 || zelf.is_empty()) && zelf.class().is(vm.ctx.types.str_type) {
548 return Ok(zelf);
553 }
554 zelf.as_wtf8()
555 .as_bytes()
556 .mul(vm, value)
557 .map(|x| Self::from(unsafe { Wtf8Buf::from_bytes_unchecked(x) }).into_ref(&vm.ctx))
558 }
559
560 pub fn try_as_utf8<'a>(&'a self, vm: &VirtualMachine) -> PyResult<&'a PyUtf8Str> {
561 self.ensure_valid_utf8(vm)?;
563 Ok(unsafe { &*(self as *const _ as *const PyUtf8Str) })
565 }
566}
567
568impl Py<PyStr> {
569 pub fn try_as_utf8<'a>(&'a self, vm: &VirtualMachine) -> PyResult<&'a Py<PyUtf8Str>> {
570 self.ensure_valid_utf8(vm)?;
572 Ok(unsafe { &*(self as *const _ as *const Py<PyUtf8Str>) })
574 }
575}
576
577#[pyclass(
578 flags(BASETYPE, _MATCH_SELF),
579 with(
580 AsMapping,
581 AsNumber,
582 AsSequence,
583 Representable,
584 Hashable,
585 Comparable,
586 Iterable,
587 Constructor
588 )
589)]
590impl PyStr {
591 fn __add__(zelf: PyRef<Self>, other: PyObjectRef, vm: &VirtualMachine) -> PyResult {
592 if let Some(other) = other.downcast_ref::<Self>() {
593 let bytes = zelf.as_wtf8().py_add(other.as_wtf8());
594 Ok(unsafe {
595 let kind = zelf.kind() | other.kind();
597 Self::new_str_unchecked(bytes.into(), kind)
598 }
599 .to_pyobject(vm))
600 } else if let Some(radd) = vm.get_method(other.clone(), identifier!(vm, __radd__)) {
601 radd?.call((zelf,), vm)
603 } else {
604 Err(vm.new_type_error(format!(
605 r#"can only concatenate str (not "{}") to str"#,
606 other.class().name()
607 )))
608 }
609 }
610
611 fn _contains(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult<bool> {
612 if let Some(needle) = needle.downcast_ref::<Self>() {
613 Ok(memchr::memmem::find(self.as_bytes(), needle.as_bytes()).is_some())
614 } else {
615 Err(vm.new_type_error(format!(
616 "'in <string>' requires string as left operand, not {}",
617 needle.class().name()
618 )))
619 }
620 }
621
622 fn __contains__(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult<bool> {
623 self._contains(&needle, vm)
624 }
625
626 fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult {
627 let item = match SequenceIndex::try_from_borrowed_object(vm, needle, "str")? {
628 SequenceIndex::Int(i) => self.getitem_by_index(vm, i)?.to_pyobject(vm),
629 SequenceIndex::Slice(slice) => self.getitem_by_slice(vm, slice)?.to_pyobject(vm),
630 };
631 Ok(item)
632 }
633
634 fn __getitem__(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult {
635 self._getitem(&needle, vm)
636 }
637
638 #[inline]
639 pub(crate) fn hash(&self, vm: &VirtualMachine) -> hash::PyHash {
640 match self.hash.load(atomic::Ordering::Relaxed) {
641 hash::SENTINEL => self._compute_hash(vm),
642 hash => hash,
643 }
644 }
645
646 #[cold]
647 fn _compute_hash(&self, vm: &VirtualMachine) -> hash::PyHash {
648 let hash_val = vm.state.hash_secret.hash_bytes(self.as_bytes());
649 debug_assert_ne!(hash_val, hash::SENTINEL);
650 self.hash.store(hash_val, atomic::Ordering::Relaxed);
653 hash_val
654 }
655
656 #[inline]
657 pub fn byte_len(&self) -> usize {
658 self.data.len()
659 }
660
661 #[inline]
662 pub fn is_empty(&self) -> bool {
663 self.data.is_empty()
664 }
665
666 #[inline]
667 pub fn char_len(&self) -> usize {
668 self.data.char_len()
669 }
670
671 #[pymethod]
672 #[inline(always)]
673 pub const fn isascii(&self) -> bool {
674 matches!(self.kind(), StrKind::Ascii)
675 }
676
677 #[pymethod]
678 fn __sizeof__(&self) -> usize {
679 core::mem::size_of::<Self>() + self.byte_len() * core::mem::size_of::<u8>()
680 }
681
682 fn __mul__(zelf: PyRef<Self>, value: ArgSize, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
683 Self::repeat(zelf, value.into(), vm)
684 }
685
686 #[inline]
687 pub(crate) fn repr(&self, vm: &VirtualMachine) -> PyResult<String> {
688 use crate::literal::escape::UnicodeEscape;
689 UnicodeEscape::new_repr(self.as_wtf8())
690 .str_repr()
691 .to_string()
692 .ok_or_else(|| vm.new_overflow_error("string is too long to generate repr"))
693 }
694
695 #[pymethod]
696 fn lower(&self) -> Self {
697 match self.as_str_kind() {
698 PyKindStr::Ascii(s) => s.to_ascii_lowercase().into(),
699 PyKindStr::Utf8(s) => s.to_lowercase().into(),
700 PyKindStr::Wtf8(w) => w.to_lowercase().into(),
701 }
702 }
703
704 #[pymethod]
706 fn casefold(&self) -> Self {
707 match self.as_str_kind() {
708 PyKindStr::Ascii(s) => caseless::default_case_fold_str(s.as_str()).into(),
709 PyKindStr::Utf8(s) => caseless::default_case_fold_str(s).into(),
710 PyKindStr::Wtf8(w) => w
711 .chunks()
712 .map(|c| match c {
713 Wtf8Chunk::Utf8(s) => Wtf8Buf::from_string(caseless::default_case_fold_str(s)),
714 Wtf8Chunk::Surrogate(c) => Wtf8Buf::from(c),
715 })
716 .collect::<Wtf8Buf>()
717 .into(),
718 }
719 }
720
721 #[pymethod]
722 fn upper(&self) -> Self {
723 match self.as_str_kind() {
724 PyKindStr::Ascii(s) => s.to_ascii_uppercase().into(),
725 PyKindStr::Utf8(s) => s.to_uppercase().into(),
726 PyKindStr::Wtf8(w) => w.to_uppercase().into(),
727 }
728 }
729
730 #[pymethod]
731 fn capitalize(&self) -> Wtf8Buf {
732 match self.as_str_kind() {
733 PyKindStr::Ascii(s) => {
734 let mut s = s.to_owned();
735 if let [first, rest @ ..] = s.as_mut_slice() {
736 first.make_ascii_uppercase();
737 ascii::AsciiStr::make_ascii_lowercase(rest.into());
738 }
739 s.into()
740 }
741 PyKindStr::Utf8(s) => {
742 let mut chars = s.chars();
743 let mut out = String::with_capacity(s.len());
744 if let Some(c) = chars.next() {
745 out.extend(c.to_titlecase());
746 out.push_str(&chars.as_str().to_lowercase());
747 }
748 out.into()
749 }
750 PyKindStr::Wtf8(s) => {
751 let mut out = Wtf8Buf::with_capacity(s.len());
752 let mut chars = s.code_points();
753 if let Some(ch) = chars.next() {
754 match ch.to_char() {
755 Some(ch) => out.extend(ch.to_titlecase()),
756 None => out.push(ch),
757 }
758 out.push_wtf8(&chars.as_wtf8().to_lowercase());
759 }
760 out
761 }
762 }
763 }
764
765 #[pymethod]
766 fn split(zelf: &Py<Self>, args: SplitArgs, vm: &VirtualMachine) -> PyResult<Vec<PyObjectRef>> {
767 let elements = match zelf.as_str_kind() {
768 PyKindStr::Ascii(s) => s.py_split(
769 args,
770 vm,
771 || zelf.as_object().to_owned(),
772 |v, s, vm| {
773 v.as_bytes()
774 .split_str(s)
775 .map(|s| unsafe { AsciiStr::from_ascii_unchecked(s) }.to_pyobject(vm))
776 .collect()
777 },
778 |v, s, n, vm| {
779 v.as_bytes()
780 .splitn_str(n, s)
781 .map(|s| unsafe { AsciiStr::from_ascii_unchecked(s) }.to_pyobject(vm))
782 .collect()
783 },
784 |v, n, vm| {
785 v.as_bytes().py_split_whitespace(n, |s| {
786 unsafe { AsciiStr::from_ascii_unchecked(s) }.to_pyobject(vm)
787 })
788 },
789 ),
790 PyKindStr::Utf8(s) => s.py_split(
791 args,
792 vm,
793 || zelf.as_object().to_owned(),
794 |v, s, vm| v.split(s).map(|s| vm.ctx.new_str(s).into()).collect(),
795 |v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
796 |v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s).into()),
797 ),
798 PyKindStr::Wtf8(w) => w.py_split(
799 args,
800 vm,
801 || zelf.as_object().to_owned(),
802 |v, s, vm| v.split(s).map(|s| vm.ctx.new_str(s).into()).collect(),
803 |v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
804 |v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s).into()),
805 ),
806 }?;
807 Ok(elements)
808 }
809
810 #[pymethod]
811 fn rsplit(zelf: &Py<Self>, args: SplitArgs, vm: &VirtualMachine) -> PyResult<Vec<PyObjectRef>> {
812 let mut elements = zelf.as_wtf8().py_split(
813 args,
814 vm,
815 || zelf.as_object().to_owned(),
816 |v, s, vm| v.rsplit(s).map(|s| vm.ctx.new_str(s).into()).collect(),
817 |v, s, n, vm| v.rsplitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
818 |v, n, vm| v.py_rsplit_whitespace(n, |s| vm.ctx.new_str(s).into()),
819 )?;
820 elements.reverse();
823 Ok(elements)
824 }
825
826 #[pymethod]
827 fn strip(&self, chars: OptionalOption<PyStrRef>) -> Self {
828 match self.as_str_kind() {
829 PyKindStr::Ascii(s) => s
830 .py_strip(
831 chars,
832 |s, chars| {
833 let s = s
834 .as_str()
835 .trim_matches(|c| memchr::memchr(c as _, chars.as_bytes()).is_some());
836 unsafe { AsciiStr::from_ascii_unchecked(s.as_bytes()) }
837 },
838 |s| s.trim(),
839 )
840 .into(),
841 PyKindStr::Utf8(s) => s
842 .py_strip(
843 chars,
844 |s, chars| s.trim_matches(|c| chars.contains(c)),
845 |s| s.trim(),
846 )
847 .into(),
848 PyKindStr::Wtf8(w) => w
849 .py_strip(
850 chars,
851 |s, chars| s.trim_matches(|c| chars.code_points().contains(&c)),
852 |s| s.trim(),
853 )
854 .into(),
855 }
856 }
857
858 #[pymethod]
859 fn lstrip(
860 zelf: PyRef<Self>,
861 chars: OptionalOption<PyStrRef>,
862 vm: &VirtualMachine,
863 ) -> PyRef<Self> {
864 let s = zelf.as_wtf8();
865 let stripped = s.py_strip(
866 chars,
867 |s, chars| s.trim_start_matches(|c| chars.contains_code_point(c)),
868 |s| s.trim_start(),
869 );
870 if s == stripped {
871 zelf
872 } else {
873 vm.ctx.new_str(stripped)
874 }
875 }
876
877 #[pymethod]
878 fn rstrip(
879 zelf: PyRef<Self>,
880 chars: OptionalOption<PyStrRef>,
881 vm: &VirtualMachine,
882 ) -> PyRef<Self> {
883 let s = zelf.as_wtf8();
884 let stripped = s.py_strip(
885 chars,
886 |s, chars| s.trim_end_matches(|c| chars.contains_code_point(c)),
887 |s| s.trim_end(),
888 );
889 if s == stripped {
890 zelf
891 } else {
892 vm.ctx.new_str(stripped)
893 }
894 }
895
896 #[pymethod]
897 fn endswith(&self, options: anystr::StartsEndsWithArgs, vm: &VirtualMachine) -> PyResult<bool> {
898 let (affix, substr) =
899 match options.prepare(self.as_wtf8(), self.len(), |s, r| s.get_chars(r)) {
900 Some(x) => x,
901 None => return Ok(false),
902 };
903 substr.py_starts_ends_with(
904 &affix,
905 "endswith",
906 "str",
907 |s, x: &Py<Self>| s.ends_with(x.as_wtf8()),
908 vm,
909 )
910 }
911
912 #[pymethod]
913 fn startswith(
914 &self,
915 options: anystr::StartsEndsWithArgs,
916 vm: &VirtualMachine,
917 ) -> PyResult<bool> {
918 let (affix, substr) =
919 match options.prepare(self.as_wtf8(), self.len(), |s, r| s.get_chars(r)) {
920 Some(x) => x,
921 None => return Ok(false),
922 };
923 substr.py_starts_ends_with(
924 &affix,
925 "startswith",
926 "str",
927 |s, x: &Py<Self>| s.starts_with(x.as_wtf8()),
928 vm,
929 )
930 }
931
932 #[pymethod]
933 fn removeprefix(&self, pref: PyStrRef) -> Wtf8Buf {
934 self.as_wtf8()
935 .py_removeprefix(pref.as_wtf8(), pref.byte_len(), |s, p| s.starts_with(p))
936 .to_owned()
937 }
938
939 #[pymethod]
940 fn removesuffix(&self, suffix: PyStrRef) -> Wtf8Buf {
941 self.as_wtf8()
942 .py_removesuffix(suffix.as_wtf8(), suffix.byte_len(), |s, p| s.ends_with(p))
943 .to_owned()
944 }
945
946 #[pymethod]
947 fn isalnum(&self) -> bool {
948 !self.data.is_empty() && self.char_all(char::is_alphanumeric)
949 }
950
951 #[pymethod]
952 fn isnumeric(&self) -> bool {
953 !self.data.is_empty() && self.char_all(char::is_numeric)
954 }
955
956 #[pymethod]
957 fn isdigit(&self) -> bool {
958 !self.data.is_empty()
960 && self.char_all(|c| {
961 c.is_ascii_digit()
962 || matches!(c, '⁰' | '¹' | '²' | '³' | '⁴' | '⁵' | '⁶' | '⁷' | '⁸' | '⁹')
963 })
964 }
965
966 #[pymethod]
967 fn isdecimal(&self) -> bool {
968 !self.data.is_empty()
969 && self.char_all(|c| GeneralCategory::of(c) == GeneralCategory::DecimalNumber)
970 }
971
972 fn __mod__(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
973 cformat_string(vm, self.as_wtf8(), values)
974 }
975
976 #[pymethod]
977 fn format(&self, args: FuncArgs, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
978 let format_str =
979 FormatString::from_str(self.as_wtf8()).map_err(|e| e.to_pyexception(vm))?;
980 format(&format_str, &args, vm)
981 }
982
983 #[pymethod]
984 fn format_map(&self, mapping: PyObjectRef, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
985 let format_string =
986 FormatString::from_str(self.as_wtf8()).map_err(|err| err.to_pyexception(vm))?;
987 format_map(&format_string, &mapping, vm)
988 }
989
990 #[pymethod]
991 fn __format__(
992 zelf: PyRef<PyStr>,
993 spec: PyUtf8StrRef,
994 vm: &VirtualMachine,
995 ) -> PyResult<PyRef<PyStr>> {
996 if spec.is_empty() {
997 return if zelf.class().is(vm.ctx.types.str_type) {
998 Ok(zelf)
999 } else {
1000 zelf.as_object().str(vm)
1001 };
1002 }
1003 let zelf = zelf.try_into_utf8(vm)?;
1004 let s = FormatSpec::parse(spec.as_str())
1005 .and_then(|format_spec| {
1006 format_spec.format_string(&CharLenStr(zelf.as_str(), zelf.char_len()))
1007 })
1008 .map_err(|err| err.into_pyexception(vm))?;
1009 Ok(vm.ctx.new_str(s))
1010 }
1011
1012 #[pymethod]
1013 fn title(&self) -> Wtf8Buf {
1014 let mut title = Wtf8Buf::with_capacity(self.data.len());
1015 let mut previous_is_cased = false;
1016 for c_orig in self.as_wtf8().code_points() {
1017 let c = c_orig.to_char_lossy();
1018 if c.is_lowercase() {
1019 if !previous_is_cased {
1020 title.extend(c.to_titlecase());
1021 } else {
1022 title.push_char(c);
1023 }
1024 previous_is_cased = true;
1025 } else if c.is_uppercase() || c.is_titlecase() {
1026 if previous_is_cased {
1027 title.extend(c.to_lowercase());
1028 } else {
1029 title.push_char(c);
1030 }
1031 previous_is_cased = true;
1032 } else {
1033 previous_is_cased = false;
1034 title.push(c_orig);
1035 }
1036 }
1037 title
1038 }
1039
1040 #[pymethod]
1041 fn swapcase(&self) -> Wtf8Buf {
1042 let mut swapped_str = Wtf8Buf::with_capacity(self.data.len());
1043 for c_orig in self.as_wtf8().code_points() {
1044 let c = c_orig.to_char_lossy();
1045 if c.is_lowercase() {
1047 swapped_str.push_char(c.to_ascii_uppercase());
1048 } else if c.is_uppercase() {
1049 swapped_str.push_char(c.to_ascii_lowercase());
1050 } else {
1051 swapped_str.push(c_orig);
1052 }
1053 }
1054 swapped_str
1055 }
1056
1057 #[pymethod]
1058 fn isalpha(&self) -> bool {
1059 !self.data.is_empty() && self.char_all(char::is_alphabetic)
1060 }
1061
1062 #[pymethod]
1063 fn replace(&self, args: ReplaceArgs) -> Wtf8Buf {
1064 use core::cmp::Ordering;
1065
1066 let s = self.as_wtf8();
1067 let ReplaceArgs { old, new, count } = args;
1068
1069 match count.cmp(&0) {
1070 Ordering::Less => s.replace(old.as_wtf8(), new.as_wtf8()),
1071 Ordering::Equal => s.to_owned(),
1072 Ordering::Greater => {
1073 let s_is_empty = s.is_empty();
1074 let old_is_empty = old.is_empty();
1075
1076 if s_is_empty && !old_is_empty {
1077 s.to_owned()
1078 } else if s_is_empty && old_is_empty {
1079 new.as_wtf8().to_owned()
1080 } else {
1081 s.replacen(old.as_wtf8(), new.as_wtf8(), count as usize)
1082 }
1083 }
1084 }
1085 }
1086
1087 #[pymethod]
1088 fn isprintable(&self) -> bool {
1089 self.char_all(|c| c == '\u{0020}' || rustpython_literal::char::is_printable(c))
1090 }
1091
1092 #[pymethod]
1093 fn isspace(&self) -> bool {
1094 use unic_ucd_bidi::bidi_class::abbr_names::*;
1095 !self.data.is_empty()
1096 && self.char_all(|c| {
1097 GeneralCategory::of(c) == GeneralCategory::SpaceSeparator
1098 || matches!(BidiClass::of(c), WS | B | S)
1099 })
1100 }
1101
1102 #[pymethod]
1104 fn islower(&self) -> bool {
1105 match self.as_str_kind() {
1106 PyKindStr::Ascii(s) => s.py_islower(),
1107 PyKindStr::Utf8(s) => s.py_islower(),
1108 PyKindStr::Wtf8(w) => w.py_islower(),
1109 }
1110 }
1111
1112 #[pymethod]
1114 fn isupper(&self) -> bool {
1115 match self.as_str_kind() {
1116 PyKindStr::Ascii(s) => s.py_isupper(),
1117 PyKindStr::Utf8(s) => s.py_isupper(),
1118 PyKindStr::Wtf8(w) => w.py_isupper(),
1119 }
1120 }
1121
1122 #[pymethod]
1123 fn splitlines(&self, args: anystr::SplitLinesArgs, vm: &VirtualMachine) -> Vec<PyObjectRef> {
1124 let into_wrapper = |s: &Wtf8| self.new_substr(s.to_owned()).to_pyobject(vm);
1125 let mut elements = Vec::new();
1126 let mut last_i = 0;
1127 let self_str = self.as_wtf8();
1128 let mut enumerated = self_str.code_point_indices().peekable();
1129 while let Some((i, ch)) = enumerated.next() {
1130 let end_len = match ch.to_char_lossy() {
1131 '\n' => 1,
1132 '\r' => {
1133 let is_rn = enumerated.next_if(|(_, ch)| *ch == '\n').is_some();
1134 if is_rn { 2 } else { 1 }
1135 }
1136 '\x0b' | '\x0c' | '\x1c' | '\x1d' | '\x1e' | '\u{0085}' | '\u{2028}'
1137 | '\u{2029}' => ch.len_wtf8(),
1138 _ => continue,
1139 };
1140 let range = if args.keepends {
1141 last_i..i + end_len
1142 } else {
1143 last_i..i
1144 };
1145 last_i = i + end_len;
1146 elements.push(into_wrapper(&self_str[range]));
1147 }
1148 if last_i != self_str.len() {
1149 elements.push(into_wrapper(&self_str[last_i..]));
1150 }
1151 elements
1152 }
1153
1154 #[pymethod]
1155 fn join(
1156 zelf: PyRef<Self>,
1157 iterable: ArgIterable<PyStrRef>,
1158 vm: &VirtualMachine,
1159 ) -> PyResult<PyStrRef> {
1160 let iter = iterable.iter(vm)?;
1161 let joined = match iter.exactly_one() {
1162 Ok(first) => {
1163 let first = first?;
1164 if first.as_object().class().is(vm.ctx.types.str_type) {
1165 return Ok(first);
1166 } else {
1167 first.as_wtf8().to_owned()
1168 }
1169 }
1170 Err(iter) => zelf.as_wtf8().py_join(iter)?,
1171 };
1172 Ok(vm.ctx.new_str(joined))
1173 }
1174
1175 #[inline]
1177 fn _to_char_idx(r: &Wtf8, byte_idx: usize) -> usize {
1178 r[..byte_idx].code_points().count()
1179 }
1180
1181 #[inline]
1182 fn _find<F>(&self, args: FindArgs, find: F) -> Option<usize>
1183 where
1184 F: Fn(&Wtf8, &Wtf8) -> Option<usize>,
1185 {
1186 let (sub, range) = args.get_value(self.len());
1187 self.as_wtf8().py_find(sub.as_wtf8(), range, find)
1188 }
1189
1190 #[pymethod]
1191 fn find(&self, args: FindArgs) -> isize {
1192 self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
1193 .map_or(-1, |v| v as isize)
1194 }
1195
1196 #[pymethod]
1197 fn rfind(&self, args: FindArgs) -> isize {
1198 self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
1199 .map_or(-1, |v| v as isize)
1200 }
1201
1202 #[pymethod]
1203 fn index(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
1204 self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
1205 .ok_or_else(|| vm.new_value_error("substring not found"))
1206 }
1207
1208 #[pymethod]
1209 fn rindex(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
1210 self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
1211 .ok_or_else(|| vm.new_value_error("substring not found"))
1212 }
1213
1214 #[pymethod]
1215 fn partition(&self, sep: PyStrRef, vm: &VirtualMachine) -> PyResult {
1216 let (front, has_mid, back) = self.as_wtf8().py_partition(
1217 sep.as_wtf8(),
1218 || self.as_wtf8().splitn(2, sep.as_wtf8()),
1219 vm,
1220 )?;
1221 let partition = (
1222 self.new_substr(front),
1223 if has_mid {
1224 sep
1225 } else {
1226 vm.ctx.new_str(ascii!(""))
1227 },
1228 self.new_substr(back),
1229 );
1230 Ok(partition.to_pyobject(vm))
1231 }
1232
1233 #[pymethod]
1234 fn rpartition(&self, sep: PyStrRef, vm: &VirtualMachine) -> PyResult {
1235 let (back, has_mid, front) = self.as_wtf8().py_partition(
1236 sep.as_wtf8(),
1237 || self.as_wtf8().rsplitn(2, sep.as_wtf8()),
1238 vm,
1239 )?;
1240 Ok((
1241 self.new_substr(front),
1242 if has_mid {
1243 sep
1244 } else {
1245 vm.ctx.empty_str.to_owned()
1246 },
1247 self.new_substr(back),
1248 )
1249 .to_pyobject(vm))
1250 }
1251
1252 #[pymethod]
1253 fn istitle(&self) -> bool {
1254 if self.data.is_empty() {
1255 return false;
1256 }
1257
1258 let mut cased = false;
1259 let mut previous_is_cased = false;
1260 for c in self.as_wtf8().code_points().map(CodePoint::to_char_lossy) {
1261 if c.is_uppercase() || c.is_titlecase() {
1262 if previous_is_cased {
1263 return false;
1264 }
1265 previous_is_cased = true;
1266 cased = true;
1267 } else if c.is_lowercase() {
1268 if !previous_is_cased {
1269 return false;
1270 }
1271 previous_is_cased = true;
1272 cased = true;
1273 } else {
1274 previous_is_cased = false;
1275 }
1276 }
1277 cased
1278 }
1279
1280 #[pymethod]
1281 fn count(&self, args: FindArgs) -> usize {
1282 let (needle, range) = args.get_value(self.len());
1283 self.as_wtf8()
1284 .py_count(needle.as_wtf8(), range, |h, n| h.find_iter(n).count())
1285 }
1286
1287 #[pymethod]
1288 fn zfill(&self, width: isize) -> Wtf8Buf {
1289 unsafe {
1290 Wtf8Buf::from_bytes_unchecked(self.as_wtf8().py_zfill(width))
1292 }
1293 }
1294
1295 #[inline]
1296 fn _pad(
1297 &self,
1298 width: isize,
1299 fillchar: OptionalArg<PyStrRef>,
1300 pad: fn(&Wtf8, usize, CodePoint, usize) -> Wtf8Buf,
1301 vm: &VirtualMachine,
1302 ) -> PyResult<Wtf8Buf> {
1303 let fillchar = fillchar.map_or(Ok(' '.into()), |ref s| {
1304 s.as_wtf8().code_points().exactly_one().map_err(|_| {
1305 vm.new_type_error("The fill character must be exactly one character long")
1306 })
1307 })?;
1308 Ok(if self.len() as isize >= width {
1309 self.as_wtf8().to_owned()
1310 } else {
1311 pad(self.as_wtf8(), width as usize, fillchar, self.len())
1312 })
1313 }
1314
1315 #[pymethod]
1316 fn center(
1317 &self,
1318 width: isize,
1319 fillchar: OptionalArg<PyStrRef>,
1320 vm: &VirtualMachine,
1321 ) -> PyResult<Wtf8Buf> {
1322 self._pad(width, fillchar, AnyStr::py_center, vm)
1323 }
1324
1325 #[pymethod]
1326 fn ljust(
1327 &self,
1328 width: isize,
1329 fillchar: OptionalArg<PyStrRef>,
1330 vm: &VirtualMachine,
1331 ) -> PyResult<Wtf8Buf> {
1332 self._pad(width, fillchar, AnyStr::py_ljust, vm)
1333 }
1334
1335 #[pymethod]
1336 fn rjust(
1337 &self,
1338 width: isize,
1339 fillchar: OptionalArg<PyStrRef>,
1340 vm: &VirtualMachine,
1341 ) -> PyResult<Wtf8Buf> {
1342 self._pad(width, fillchar, AnyStr::py_rjust, vm)
1343 }
1344
1345 #[pymethod]
1346 fn expandtabs(&self, args: anystr::ExpandTabsArgs, vm: &VirtualMachine) -> PyResult<String> {
1347 Ok(rustpython_common::str::expandtabs(
1349 self.try_as_utf8(vm)?.as_str(),
1350 args.tabsize(),
1351 ))
1352 }
1353
1354 #[pymethod]
1355 pub fn isidentifier(&self) -> bool {
1356 let Some(s) = self.to_str() else { return false };
1357 let mut chars = s.chars();
1358 let is_identifier_start = chars.next().is_some_and(|c| c == '_' || is_xid_start(c));
1359 is_identifier_start && chars.all(is_xid_continue)
1361 }
1362
1363 #[pymethod]
1365 fn translate(&self, table: PyObjectRef, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
1366 vm.get_method_or_type_error(table.clone(), identifier!(vm, __getitem__), || {
1367 format!("'{}' object is not subscriptable", table.class().name())
1368 })?;
1369
1370 let mut translated = Wtf8Buf::new();
1371 for cp in self.as_wtf8().code_points() {
1372 match table.get_item(&*cp.to_u32().to_pyobject(vm), vm) {
1373 Ok(value) => {
1374 if let Some(text) = value.downcast_ref::<Self>() {
1375 translated.push_wtf8(text.as_wtf8());
1376 } else if let Some(bigint) = value.downcast_ref::<PyInt>() {
1377 let mapped = bigint
1378 .as_bigint()
1379 .to_u32()
1380 .and_then(CodePoint::from_u32)
1381 .ok_or_else(|| {
1382 vm.new_value_error("character mapping must be in range(0x110000)")
1383 })?;
1384 translated.push(mapped);
1385 } else if !vm.is_none(&value) {
1386 return Err(
1387 vm.new_type_error("character mapping must return integer, None or str")
1388 );
1389 }
1390 }
1391 Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => translated.push(cp),
1392 Err(e) => return Err(e),
1393 }
1394 }
1395 Ok(translated)
1396 }
1397
1398 #[pystaticmethod]
1399 fn maketrans(
1400 dict_or_str: PyObjectRef,
1401 to_str: OptionalArg<PyStrRef>,
1402 none_str: OptionalArg<PyStrRef>,
1403 vm: &VirtualMachine,
1404 ) -> PyResult {
1405 let new_dict = vm.ctx.new_dict();
1406 if let OptionalArg::Present(to_str) = to_str {
1407 match dict_or_str.downcast::<Self>() {
1408 Ok(from_str) => {
1409 if to_str.len() == from_str.len() {
1410 for (c1, c2) in from_str
1411 .as_wtf8()
1412 .code_points()
1413 .zip(to_str.as_wtf8().code_points())
1414 {
1415 new_dict.set_item(
1416 &*vm.new_pyobj(c1.to_u32()),
1417 vm.new_pyobj(c2.to_u32()),
1418 vm,
1419 )?;
1420 }
1421 if let OptionalArg::Present(none_str) = none_str {
1422 for c in none_str.as_wtf8().code_points() {
1423 new_dict.set_item(&*vm.new_pyobj(c.to_u32()), vm.ctx.none(), vm)?;
1424 }
1425 }
1426 Ok(new_dict.to_pyobject(vm))
1427 } else {
1428 Err(vm.new_value_error(
1429 "the first two maketrans arguments must have equal length",
1430 ))
1431 }
1432 }
1433 _ => Err(vm.new_type_error(
1434 "first maketrans argument must be a string if there is a second argument",
1435 )),
1436 }
1437 } else {
1438 match dict_or_str.downcast::<PyDict>() {
1440 Ok(dict) => {
1441 for (key, val) in dict {
1442 if let Some(num) = key.downcast_ref::<PyInt>() {
1444 new_dict.set_item(
1445 &*num.as_bigint().to_i32().to_pyobject(vm),
1446 val,
1447 vm,
1448 )?;
1449 } else if let Some(string) = key.downcast_ref::<Self>() {
1450 if string.len() == 1 {
1451 let num_value =
1452 string.as_wtf8().code_points().next().unwrap().to_u32();
1453 new_dict.set_item(&*num_value.to_pyobject(vm), val, vm)?;
1454 } else {
1455 return Err(vm.new_value_error(
1456 "string keys in translate table must be of length 1",
1457 ));
1458 }
1459 } else {
1460 return Err(vm.new_type_error(
1461 "keys in translate table must be strings or integers",
1462 ));
1463 }
1464 }
1465 Ok(new_dict.to_pyobject(vm))
1466 }
1467 _ => Err(vm.new_value_error(
1468 "if you give only one argument to maketrans it must be a dict",
1469 )),
1470 }
1471 }
1472 }
1473
1474 #[pymethod]
1475 fn encode(zelf: PyRef<Self>, args: EncodeArgs, vm: &VirtualMachine) -> PyResult<PyBytesRef> {
1476 encode_string(zelf, args.encoding, args.errors, vm)
1477 }
1478
1479 #[pymethod]
1480 fn __getnewargs__(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyObjectRef {
1481 (zelf.as_wtf8(),).to_pyobject(vm)
1482 }
1483
1484 #[pymethod]
1485 fn __str__(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<PyStrRef> {
1486 if zelf.class().is(vm.ctx.types.str_type) {
1487 Ok(zelf.to_owned())
1489 } else {
1490 Ok(PyStr::from(zelf.data.clone()).into_ref(&vm.ctx))
1492 }
1493 }
1494}
1495
1496impl PyRef<PyStr> {
1497 pub fn is_empty(&self) -> bool {
1498 (**self).is_empty()
1499 }
1500
1501 pub fn concat_in_place(&mut self, other: &Wtf8, vm: &VirtualMachine) {
1502 if other.is_empty() {
1503 return;
1504 }
1505 let mut s = Wtf8Buf::with_capacity(self.byte_len() + other.len());
1506 s.push_wtf8(self.as_ref());
1507 s.push_wtf8(other);
1508 if self.as_object().strong_count() == 1 {
1509 unsafe {
1512 let payload = self.payload() as *const PyStr as *mut PyStr;
1513 (*payload).data = PyStr::from(s).data;
1514 (*payload)
1515 .hash
1516 .store(hash::SENTINEL, atomic::Ordering::Relaxed);
1517 }
1518 } else {
1519 *self = PyStr::from(s).into_ref(&vm.ctx);
1520 }
1521 }
1522
1523 pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult<PyRef<PyUtf8Str>> {
1524 self.ensure_valid_utf8(vm)?;
1525 Ok(unsafe { mem::transmute::<Self, PyRef<PyUtf8Str>>(self) })
1526 }
1527}
1528
1529struct CharLenStr<'a>(&'a str, usize);
1530impl core::ops::Deref for CharLenStr<'_> {
1531 type Target = str;
1532
1533 fn deref(&self) -> &Self::Target {
1534 self.0
1535 }
1536}
1537impl crate::common::format::CharLen for CharLenStr<'_> {
1538 fn char_len(&self) -> usize {
1539 self.1
1540 }
1541}
1542
1543impl Representable for PyStr {
1544 #[inline]
1545 fn repr_str(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<String> {
1546 zelf.repr(vm)
1547 }
1548}
1549
1550impl Hashable for PyStr {
1551 #[inline]
1552 fn hash(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<hash::PyHash> {
1553 Ok(zelf.hash(vm))
1554 }
1555}
1556
1557impl Comparable for PyStr {
1558 fn cmp(
1559 zelf: &Py<Self>,
1560 other: &PyObject,
1561 op: PyComparisonOp,
1562 _vm: &VirtualMachine,
1563 ) -> PyResult<PyComparisonValue> {
1564 if let Some(res) = op.identical_optimization(zelf, other) {
1565 return Ok(res.into());
1566 }
1567 let other = class_or_notimplemented!(Self, other);
1568 Ok(op.eval_ord(zelf.as_wtf8().cmp(other.as_wtf8())).into())
1569 }
1570}
1571
1572impl Iterable for PyStr {
1573 fn iter(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult {
1574 Ok(PyStrIterator {
1575 internal: PyMutex::new((PositionIterInternal::new(zelf, 0), 0)),
1576 }
1577 .into_pyobject(vm))
1578 }
1579}
1580
1581impl AsMapping for PyStr {
1582 fn as_mapping() -> &'static PyMappingMethods {
1583 static AS_MAPPING: LazyLock<PyMappingMethods> = LazyLock::new(|| PyMappingMethods {
1584 length: atomic_func!(|mapping, _vm| Ok(PyStr::mapping_downcast(mapping).len())),
1585 subscript: atomic_func!(
1586 |mapping, needle, vm| PyStr::mapping_downcast(mapping)._getitem(needle, vm)
1587 ),
1588 ..PyMappingMethods::NOT_IMPLEMENTED
1589 });
1590 &AS_MAPPING
1591 }
1592}
1593
1594impl AsNumber for PyStr {
1595 fn as_number() -> &'static PyNumberMethods {
1596 static AS_NUMBER: PyNumberMethods = PyNumberMethods {
1597 add: Some(|a, b, vm| {
1598 let Some(a) = a.downcast_ref::<PyStr>() else {
1599 return Ok(vm.ctx.not_implemented());
1600 };
1601 let Some(b) = b.downcast_ref::<PyStr>() else {
1602 return Ok(vm.ctx.not_implemented());
1603 };
1604 let bytes = a.as_wtf8().py_add(b.as_wtf8());
1605 Ok(unsafe {
1606 let kind = a.kind() | b.kind();
1607 PyStr::new_str_unchecked(bytes.into(), kind)
1608 }
1609 .to_pyobject(vm))
1610 }),
1611 remainder: Some(|a, b, vm| {
1612 if let Some(a) = a.downcast_ref::<PyStr>() {
1613 a.__mod__(b.to_owned(), vm).to_pyresult(vm)
1614 } else {
1615 Ok(vm.ctx.not_implemented())
1616 }
1617 }),
1618 ..PyNumberMethods::NOT_IMPLEMENTED
1619 };
1620 &AS_NUMBER
1621 }
1622}
1623
1624impl AsSequence for PyStr {
1625 fn as_sequence() -> &'static PySequenceMethods {
1626 static AS_SEQUENCE: LazyLock<PySequenceMethods> = LazyLock::new(|| PySequenceMethods {
1627 length: atomic_func!(|seq, _vm| Ok(PyStr::sequence_downcast(seq).len())),
1628 concat: atomic_func!(|seq, other, vm| {
1629 let zelf = PyStr::sequence_downcast(seq);
1630 PyStr::__add__(zelf.to_owned(), other.to_owned(), vm)
1631 }),
1632 repeat: atomic_func!(|seq, n, vm| {
1633 let zelf = PyStr::sequence_downcast(seq);
1634 PyStr::repeat(zelf.to_owned(), n, vm).map(|x| x.into())
1635 }),
1636 item: atomic_func!(|seq, i, vm| {
1637 let zelf = PyStr::sequence_downcast(seq);
1638 zelf.getitem_by_index(vm, i).to_pyresult(vm)
1639 }),
1640 contains: atomic_func!(
1641 |seq, needle, vm| PyStr::sequence_downcast(seq)._contains(needle, vm)
1642 ),
1643 ..PySequenceMethods::NOT_IMPLEMENTED
1644 });
1645 &AS_SEQUENCE
1646 }
1647}
1648
1649#[derive(FromArgs)]
1650struct EncodeArgs {
1651 #[pyarg(any, default)]
1652 encoding: Option<PyUtf8StrRef>,
1653 #[pyarg(any, default)]
1654 errors: Option<PyUtf8StrRef>,
1655}
1656
1657pub(crate) fn encode_string(
1658 s: PyStrRef,
1659 encoding: Option<PyUtf8StrRef>,
1660 errors: Option<PyUtf8StrRef>,
1661 vm: &VirtualMachine,
1662) -> PyResult<PyBytesRef> {
1663 let encoding = match encoding.as_ref() {
1664 None => crate::codecs::DEFAULT_ENCODING,
1665 Some(s) => s.as_str(),
1666 };
1667 vm.state.codec_registry.encode_text(s, encoding, errors, vm)
1668}
1669
1670impl PyPayload for PyStr {
1671 #[inline]
1672 fn class(ctx: &Context) -> &'static Py<PyType> {
1673 ctx.types.str_type
1674 }
1675}
1676
1677impl ToPyObject for String {
1678 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1679 vm.ctx.new_str(self).into()
1680 }
1681}
1682
1683impl ToPyObject for Wtf8Buf {
1684 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1685 vm.ctx.new_str(self).into()
1686 }
1687}
1688
1689impl ToPyObject for char {
1690 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1691 let cp = self as u32;
1692 if cp <= u8::MAX as u32 {
1693 vm.ctx.latin1_char(cp as u8).into()
1694 } else {
1695 vm.ctx.new_str(self).into()
1696 }
1697 }
1698}
1699
1700impl ToPyObject for CodePoint {
1701 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1702 let cp = self.to_u32();
1703 if cp <= u8::MAX as u32 {
1704 vm.ctx.latin1_char(cp as u8).into()
1705 } else {
1706 vm.ctx.new_str(self).into()
1707 }
1708 }
1709}
1710
1711impl ToPyObject for &str {
1712 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1713 vm.ctx.new_str(self).into()
1714 }
1715}
1716
1717impl ToPyObject for &String {
1718 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1719 vm.ctx.new_str(self.clone()).into()
1720 }
1721}
1722
1723impl ToPyObject for &Wtf8 {
1724 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1725 vm.ctx.new_str(self).into()
1726 }
1727}
1728
1729impl ToPyObject for &Wtf8Buf {
1730 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1731 vm.ctx.new_str(self.clone()).into()
1732 }
1733}
1734
1735impl ToPyObject for &AsciiStr {
1736 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1737 vm.ctx.new_str(self).into()
1738 }
1739}
1740
1741impl ToPyObject for AsciiString {
1742 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1743 vm.ctx.new_str(self).into()
1744 }
1745}
1746
1747impl ToPyObject for AsciiChar {
1748 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1749 vm.ctx.latin1_char(u8::from(self)).into()
1750 }
1751}
1752
1753type SplitArgs = anystr::SplitArgs<PyStrRef>;
1754
1755#[derive(FromArgs)]
1756pub struct FindArgs {
1757 #[pyarg(positional)]
1758 sub: PyStrRef,
1759 #[pyarg(positional, default)]
1760 start: Option<PyIntRef>,
1761 #[pyarg(positional, default)]
1762 end: Option<PyIntRef>,
1763}
1764
1765impl FindArgs {
1766 fn get_value(self, len: usize) -> (PyStrRef, core::ops::Range<usize>) {
1767 let range = adjust_indices(self.start, self.end, len);
1768 (self.sub, range)
1769 }
1770}
1771
1772#[derive(FromArgs)]
1773struct ReplaceArgs {
1774 #[pyarg(positional)]
1775 old: PyStrRef,
1776
1777 #[pyarg(positional)]
1778 new: PyStrRef,
1779
1780 #[pyarg(any, default = -1)]
1781 count: isize,
1782}
1783
1784fn vectorcall_str(
1785 zelf_obj: &PyObject,
1786 args: Vec<PyObjectRef>,
1787 nargs: usize,
1788 kwnames: Option<&[PyObjectRef]>,
1789 vm: &VirtualMachine,
1790) -> PyResult {
1791 let zelf: &Py<PyType> = zelf_obj.downcast_ref().unwrap();
1792 let func_args = FuncArgs::from_vectorcall_owned(args, nargs, kwnames);
1793 (zelf.slots.new.load().unwrap())(zelf.to_owned(), func_args, vm)
1794}
1795
1796pub fn init(ctx: &'static Context) {
1797 PyStr::extend_class(ctx, ctx.types.str_type);
1798 ctx.types
1799 .str_type
1800 .slots
1801 .vectorcall
1802 .store(Some(vectorcall_str));
1803
1804 PyStrIterator::extend_class(ctx, ctx.types.str_iterator_type);
1805}
1806
1807impl SliceableSequenceOp for PyStr {
1808 type Item = CodePoint;
1809 type Sliced = Self;
1810
1811 fn do_get(&self, index: usize) -> Self::Item {
1812 self.data.nth_char(index)
1813 }
1814
1815 fn do_slice(&self, range: Range<usize>) -> Self::Sliced {
1816 match self.as_str_kind() {
1817 PyKindStr::Ascii(s) => s[range].into(),
1818 PyKindStr::Utf8(s) => {
1819 let char_len = range.len();
1820 let out = rustpython_common::str::get_chars(s, range);
1821 unsafe { Self::new_with_char_len(out, char_len) }
1823 }
1824 PyKindStr::Wtf8(w) => {
1825 let char_len = range.len();
1826 let out = rustpython_common::str::get_codepoints(w, range);
1827 unsafe { Self::new_with_char_len(out, char_len) }
1829 }
1830 }
1831 }
1832
1833 fn do_slice_reverse(&self, range: Range<usize>) -> Self::Sliced {
1834 match self.as_str_kind() {
1835 PyKindStr::Ascii(s) => {
1836 let mut out = s[range].to_owned();
1837 out.as_mut_slice().reverse();
1838 out.into()
1839 }
1840 PyKindStr::Utf8(s) => {
1841 let char_len = range.len();
1842 let mut out = String::with_capacity(2 * char_len);
1843 out.extend(
1844 s.chars()
1845 .rev()
1846 .skip(self.char_len() - range.end)
1847 .take(range.len()),
1848 );
1849 unsafe { Self::new_with_char_len(out, range.len()) }
1851 }
1852 PyKindStr::Wtf8(w) => {
1853 let char_len = range.len();
1854 let mut out = Wtf8Buf::with_capacity(2 * char_len);
1855 out.extend(
1856 w.code_points()
1857 .rev()
1858 .skip(self.char_len() - range.end)
1859 .take(range.len()),
1860 );
1861 unsafe { Self::new_with_char_len(out, char_len) }
1863 }
1864 }
1865 }
1866
1867 fn do_stepped_slice(&self, range: Range<usize>, step: usize) -> Self::Sliced {
1868 match self.as_str_kind() {
1869 PyKindStr::Ascii(s) => s[range]
1870 .as_slice()
1871 .iter()
1872 .copied()
1873 .step_by(step)
1874 .collect::<AsciiString>()
1875 .into(),
1876 PyKindStr::Utf8(s) => {
1877 let char_len = (range.len() / step) + 1;
1878 let mut out = String::with_capacity(2 * char_len);
1879 out.extend(s.chars().skip(range.start).take(range.len()).step_by(step));
1880 unsafe { Self::new_with_char_len(out, char_len) }
1882 }
1883 PyKindStr::Wtf8(w) => {
1884 let char_len = (range.len() / step) + 1;
1885 let mut out = Wtf8Buf::with_capacity(2 * char_len);
1886 out.extend(
1887 w.code_points()
1888 .skip(range.start)
1889 .take(range.len())
1890 .step_by(step),
1891 );
1892 unsafe { Self::new_with_char_len(out, char_len) }
1894 }
1895 }
1896 }
1897
1898 fn do_stepped_slice_reverse(&self, range: Range<usize>, step: usize) -> Self::Sliced {
1899 match self.as_str_kind() {
1900 PyKindStr::Ascii(s) => s[range]
1901 .chars()
1902 .rev()
1903 .step_by(step)
1904 .collect::<AsciiString>()
1905 .into(),
1906 PyKindStr::Utf8(s) => {
1907 let char_len = (range.len() / step) + 1;
1908 let mut out = String::with_capacity(2 * char_len);
1910 out.extend(
1911 s.chars()
1912 .rev()
1913 .skip(self.char_len() - range.end)
1914 .take(range.len())
1915 .step_by(step),
1916 );
1917 unsafe { Self::new_with_char_len(out, char_len) }
1919 }
1920 PyKindStr::Wtf8(w) => {
1921 let char_len = (range.len() / step) + 1;
1922 let mut out = Wtf8Buf::with_capacity(2 * char_len);
1924 out.extend(
1925 w.code_points()
1926 .rev()
1927 .skip(self.char_len() - range.end)
1928 .take(range.len())
1929 .step_by(step),
1930 );
1931 unsafe { Self::new_with_char_len(out, char_len) }
1933 }
1934 }
1935 }
1936
1937 fn empty() -> Self::Sliced {
1938 Self::default()
1939 }
1940
1941 fn len(&self) -> usize {
1942 self.char_len()
1943 }
1944}
1945
1946impl AsRef<str> for PyRefExact<PyStr> {
1947 #[track_caller]
1948 fn as_ref(&self) -> &str {
1949 self.to_str().expect("str has surrogates")
1950 }
1951}
1952
1953impl AsRef<str> for PyExact<PyStr> {
1954 #[track_caller]
1955 fn as_ref(&self) -> &str {
1956 self.to_str().expect("str has surrogates")
1957 }
1958}
1959
1960impl AsRef<Wtf8> for PyRefExact<PyStr> {
1961 fn as_ref(&self) -> &Wtf8 {
1962 self.as_wtf8()
1963 }
1964}
1965
1966impl AsRef<Wtf8> for PyExact<PyStr> {
1967 fn as_ref(&self) -> &Wtf8 {
1968 self.as_wtf8()
1969 }
1970}
1971
1972impl AnyStrWrapper<Wtf8> for PyStrRef {
1973 fn as_ref(&self) -> Option<&Wtf8> {
1974 Some(self.as_wtf8())
1975 }
1976
1977 fn is_empty(&self) -> bool {
1978 self.data.is_empty()
1979 }
1980}
1981
1982impl AnyStrWrapper<str> for PyStrRef {
1983 fn as_ref(&self) -> Option<&str> {
1984 self.data.as_str()
1985 }
1986
1987 fn is_empty(&self) -> bool {
1988 self.data.is_empty()
1989 }
1990}
1991
1992impl AnyStrWrapper<AsciiStr> for PyStrRef {
1993 fn as_ref(&self) -> Option<&AsciiStr> {
1994 self.data.as_ascii()
1995 }
1996
1997 fn is_empty(&self) -> bool {
1998 self.data.is_empty()
1999 }
2000}
2001
2002#[repr(transparent)]
2003#[derive(Debug)]
2004pub struct PyUtf8Str(PyStr);
2005
2006impl fmt::Display for PyUtf8Str {
2007 #[inline]
2008 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2009 self.0.fmt(f)
2010 }
2011}
2012
2013impl MaybeTraverse for PyUtf8Str {
2014 const HAS_TRAVERSE: bool = true;
2015 const HAS_CLEAR: bool = false;
2016
2017 fn try_traverse(&self, traverse_fn: &mut TraverseFn<'_>) {
2018 self.0.try_traverse(traverse_fn);
2019 }
2020
2021 fn try_clear(&mut self, _out: &mut Vec<PyObjectRef>) {
2022 }
2024}
2025
2026impl PyPayload for PyUtf8Str {
2027 #[inline]
2028 fn class(ctx: &Context) -> &'static Py<PyType> {
2029 ctx.types.str_type
2030 }
2031
2032 const PAYLOAD_TYPE_ID: core::any::TypeId = core::any::TypeId::of::<PyStr>();
2033
2034 unsafe fn validate_downcastable_from(obj: &PyObject) -> bool {
2035 let wtf8 = unsafe { obj.downcast_unchecked_ref::<PyStr>() };
2037 wtf8.is_utf8()
2038 }
2039
2040 fn try_downcast_from(obj: &PyObject, vm: &VirtualMachine) -> PyResult<()> {
2041 let str = obj.try_downcast_ref::<PyStr>(vm)?;
2042 str.ensure_valid_utf8(vm)
2043 }
2044}
2045
2046impl<'a> From<&'a AsciiStr> for PyUtf8Str {
2047 fn from(s: &'a AsciiStr) -> Self {
2048 s.to_owned().into()
2049 }
2050}
2051
2052impl From<AsciiString> for PyUtf8Str {
2053 fn from(s: AsciiString) -> Self {
2054 s.into_boxed_ascii_str().into()
2055 }
2056}
2057
2058impl From<Box<AsciiStr>> for PyUtf8Str {
2059 fn from(s: Box<AsciiStr>) -> Self {
2060 let data = StrData::from(s);
2061 unsafe { Self::from_str_data_unchecked(data) }
2062 }
2063}
2064
2065impl From<AsciiChar> for PyUtf8Str {
2066 fn from(ch: AsciiChar) -> Self {
2067 AsciiString::from(ch).into()
2068 }
2069}
2070
2071impl<'a> From<&'a str> for PyUtf8Str {
2072 fn from(s: &'a str) -> Self {
2073 s.to_owned().into()
2074 }
2075}
2076
2077impl From<String> for PyUtf8Str {
2078 fn from(s: String) -> Self {
2079 s.into_boxed_str().into()
2080 }
2081}
2082
2083impl From<char> for PyUtf8Str {
2084 fn from(ch: char) -> Self {
2085 let data = StrData::from(ch);
2086 unsafe { Self::from_str_data_unchecked(data) }
2087 }
2088}
2089
2090impl<'a> From<alloc::borrow::Cow<'a, str>> for PyUtf8Str {
2091 fn from(s: alloc::borrow::Cow<'a, str>) -> Self {
2092 s.into_owned().into()
2093 }
2094}
2095
2096impl From<Box<str>> for PyUtf8Str {
2097 #[inline]
2098 fn from(value: Box<str>) -> Self {
2099 let data = StrData::from(value);
2100 unsafe { Self::from_str_data_unchecked(data) }
2101 }
2102}
2103
2104impl AsRef<Wtf8> for PyUtf8Str {
2105 #[inline]
2106 fn as_ref(&self) -> &Wtf8 {
2107 self.0.as_wtf8()
2108 }
2109}
2110
2111impl AsRef<str> for PyUtf8Str {
2112 #[inline]
2113 fn as_ref(&self) -> &str {
2114 self.as_str()
2115 }
2116}
2117
2118impl PyUtf8Str {
2119 unsafe fn from_str_data_unchecked(data: StrData) -> Self {
2123 Self(PyStr::from(data))
2124 }
2125
2126 #[inline]
2128 pub fn as_wtf8(&self) -> &Wtf8 {
2129 self.0.as_wtf8()
2130 }
2131
2132 pub fn as_str(&self) -> &str {
2134 debug_assert!(
2135 self.0.is_utf8(),
2136 "PyUtf8Str invariant violated: inner string is not valid UTF-8"
2137 );
2138 unsafe { self.0.to_str().unwrap_unchecked() }
2140 }
2141
2142 #[inline]
2143 pub fn as_bytes(&self) -> &[u8] {
2144 self.as_str().as_bytes()
2145 }
2146
2147 #[inline]
2148 pub fn byte_len(&self) -> usize {
2149 self.0.byte_len()
2150 }
2151
2152 #[inline]
2153 pub fn is_empty(&self) -> bool {
2154 self.0.is_empty()
2155 }
2156
2157 #[inline]
2158 pub fn char_len(&self) -> usize {
2159 self.0.char_len()
2160 }
2161}
2162
2163impl Py<PyUtf8Str> {
2164 pub fn as_pystr(&self) -> &Py<PyStr> {
2166 unsafe {
2167 &*(self as *const Self as *const Py<PyStr>)
2169 }
2170 }
2171
2172 #[inline]
2174 pub fn as_str(&self) -> &str {
2175 self.as_pystr().to_str().unwrap_or_else(|| {
2176 debug_assert!(false, "PyUtf8Str invariant violated");
2177 unsafe { core::hint::unreachable_unchecked() }
2179 })
2180 }
2181}
2182
2183impl PyRef<PyUtf8Str> {
2184 pub fn into_wtf8(self) -> PyStrRef {
2186 unsafe { mem::transmute::<Self, PyStrRef>(self) }
2187 }
2188}
2189
2190impl From<PyRef<PyUtf8Str>> for PyRef<PyStr> {
2191 fn from(s: PyRef<PyUtf8Str>) -> Self {
2192 s.into_wtf8()
2193 }
2194}
2195
2196impl PartialEq for PyUtf8Str {
2197 fn eq(&self, other: &Self) -> bool {
2198 self.as_str() == other.as_str()
2199 }
2200}
2201impl Eq for PyUtf8Str {}
2202
2203impl AnyStrContainer<str> for String {
2204 fn new() -> Self {
2205 Self::new()
2206 }
2207
2208 fn with_capacity(capacity: usize) -> Self {
2209 Self::with_capacity(capacity)
2210 }
2211
2212 fn push_str(&mut self, other: &str) {
2213 Self::push_str(self, other)
2214 }
2215}
2216
2217impl anystr::AnyChar for char {
2218 fn is_lowercase(self) -> bool {
2219 self.is_lowercase()
2220 }
2221
2222 fn is_uppercase(self) -> bool {
2223 self.is_uppercase()
2224 }
2225
2226 fn bytes_len(self) -> usize {
2227 self.len_utf8()
2228 }
2229}
2230
2231impl AnyStr for str {
2232 type Char = char;
2233 type Container = String;
2234
2235 fn to_container(&self) -> Self::Container {
2236 self.to_owned()
2237 }
2238
2239 fn as_bytes(&self) -> &[u8] {
2240 self.as_bytes()
2241 }
2242
2243 fn elements(&self) -> impl Iterator<Item = char> {
2244 Self::chars(self)
2245 }
2246
2247 fn get_bytes(&self, range: core::ops::Range<usize>) -> &Self {
2248 &self[range]
2249 }
2250
2251 fn get_chars(&self, range: core::ops::Range<usize>) -> &Self {
2252 rustpython_common::str::get_chars(self, range)
2253 }
2254
2255 fn is_empty(&self) -> bool {
2256 Self::is_empty(self)
2257 }
2258
2259 fn bytes_len(&self) -> usize {
2260 Self::len(self)
2261 }
2262
2263 fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
2264 where
2265 F: Fn(&Self) -> PyObjectRef,
2266 {
2267 let mut splits = Vec::new();
2269 let mut last_offset = 0;
2270 let mut count = maxsplit;
2271 for (offset, _) in self.match_indices(|c: char| c.is_ascii_whitespace() || c == '\x0b') {
2272 if last_offset == offset {
2273 last_offset += 1;
2274 continue;
2275 }
2276 if count == 0 {
2277 break;
2278 }
2279 splits.push(convert(&self[last_offset..offset]));
2280 last_offset = offset + 1;
2281 count -= 1;
2282 }
2283 if last_offset != self.len() {
2284 splits.push(convert(&self[last_offset..]));
2285 }
2286 splits
2287 }
2288
2289 fn py_rsplit_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
2290 where
2291 F: Fn(&Self) -> PyObjectRef,
2292 {
2293 let mut splits = Vec::new();
2295 let mut last_offset = self.len();
2296 let mut count = maxsplit;
2297 for (offset, _) in self.rmatch_indices(|c: char| c.is_ascii_whitespace() || c == '\x0b') {
2298 if last_offset == offset + 1 {
2299 last_offset -= 1;
2300 continue;
2301 }
2302 if count == 0 {
2303 break;
2304 }
2305 splits.push(convert(&self[offset + 1..last_offset]));
2306 last_offset = offset;
2307 count -= 1;
2308 }
2309 if last_offset != 0 {
2310 splits.push(convert(&self[..last_offset]));
2311 }
2312 splits
2313 }
2314}
2315
2316impl AnyStrContainer<Wtf8> for Wtf8Buf {
2317 fn new() -> Self {
2318 Self::new()
2319 }
2320
2321 fn with_capacity(capacity: usize) -> Self {
2322 Self::with_capacity(capacity)
2323 }
2324
2325 fn push_str(&mut self, other: &Wtf8) {
2326 self.push_wtf8(other)
2327 }
2328}
2329
2330impl anystr::AnyChar for CodePoint {
2331 fn is_lowercase(self) -> bool {
2332 self.is_char_and(char::is_lowercase)
2333 }
2334 fn is_uppercase(self) -> bool {
2335 self.is_char_and(char::is_uppercase)
2336 }
2337 fn bytes_len(self) -> usize {
2338 self.len_wtf8()
2339 }
2340}
2341
2342impl AnyStr for Wtf8 {
2343 type Char = CodePoint;
2344 type Container = Wtf8Buf;
2345
2346 fn to_container(&self) -> Self::Container {
2347 self.to_owned()
2348 }
2349
2350 fn as_bytes(&self) -> &[u8] {
2351 self.as_bytes()
2352 }
2353
2354 fn elements(&self) -> impl Iterator<Item = Self::Char> {
2355 self.code_points()
2356 }
2357
2358 fn get_bytes(&self, range: core::ops::Range<usize>) -> &Self {
2359 &self[range]
2360 }
2361
2362 fn get_chars(&self, range: core::ops::Range<usize>) -> &Self {
2363 rustpython_common::str::get_codepoints(self, range)
2364 }
2365
2366 fn bytes_len(&self) -> usize {
2367 self.len()
2368 }
2369
2370 fn is_empty(&self) -> bool {
2371 self.is_empty()
2372 }
2373
2374 fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
2375 where
2376 F: Fn(&Self) -> PyObjectRef,
2377 {
2378 let mut splits = Vec::new();
2380 let mut last_offset = 0;
2381 let mut count = maxsplit;
2382 for (offset, _) in self
2383 .code_point_indices()
2384 .filter(|(_, c)| c.is_char_and(|c| c.is_ascii_whitespace() || c == '\x0b'))
2385 {
2386 if last_offset == offset {
2387 last_offset += 1;
2388 continue;
2389 }
2390 if count == 0 {
2391 break;
2392 }
2393 splits.push(convert(&self[last_offset..offset]));
2394 last_offset = offset + 1;
2395 count -= 1;
2396 }
2397 if last_offset != self.len() {
2398 splits.push(convert(&self[last_offset..]));
2399 }
2400 splits
2401 }
2402
2403 fn py_rsplit_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
2404 where
2405 F: Fn(&Self) -> PyObjectRef,
2406 {
2407 let mut splits = Vec::new();
2409 let mut last_offset = self.len();
2410 let mut count = maxsplit;
2411 for (offset, _) in self
2412 .code_point_indices()
2413 .rev()
2414 .filter(|(_, c)| c.is_char_and(|c| c.is_ascii_whitespace() || c == '\x0b'))
2415 {
2416 if last_offset == offset + 1 {
2417 last_offset -= 1;
2418 continue;
2419 }
2420 if count == 0 {
2421 break;
2422 }
2423 splits.push(convert(&self[offset + 1..last_offset]));
2424 last_offset = offset;
2425 count -= 1;
2426 }
2427 if last_offset != 0 {
2428 splits.push(convert(&self[..last_offset]));
2429 }
2430 splits
2431 }
2432}
2433
2434impl AnyStrContainer<AsciiStr> for AsciiString {
2435 fn new() -> Self {
2436 Self::new()
2437 }
2438
2439 fn with_capacity(capacity: usize) -> Self {
2440 Self::with_capacity(capacity)
2441 }
2442
2443 fn push_str(&mut self, other: &AsciiStr) {
2444 Self::push_str(self, other)
2445 }
2446}
2447
2448impl anystr::AnyChar for ascii::AsciiChar {
2449 fn is_lowercase(self) -> bool {
2450 self.is_lowercase()
2451 }
2452
2453 fn is_uppercase(self) -> bool {
2454 self.is_uppercase()
2455 }
2456
2457 fn bytes_len(self) -> usize {
2458 1
2459 }
2460}
2461
2462const ASCII_WHITESPACES: [u8; 6] = [0x20, 0x09, 0x0a, 0x0c, 0x0d, 0x0b];
2463
2464impl AnyStr for AsciiStr {
2465 type Char = AsciiChar;
2466 type Container = AsciiString;
2467
2468 fn to_container(&self) -> Self::Container {
2469 self.to_ascii_string()
2470 }
2471
2472 fn as_bytes(&self) -> &[u8] {
2473 self.as_bytes()
2474 }
2475
2476 fn elements(&self) -> impl Iterator<Item = Self::Char> {
2477 self.chars()
2478 }
2479
2480 fn get_bytes(&self, range: core::ops::Range<usize>) -> &Self {
2481 &self[range]
2482 }
2483
2484 fn get_chars(&self, range: core::ops::Range<usize>) -> &Self {
2485 &self[range]
2486 }
2487
2488 fn bytes_len(&self) -> usize {
2489 self.len()
2490 }
2491
2492 fn is_empty(&self) -> bool {
2493 self.is_empty()
2494 }
2495
2496 fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
2497 where
2498 F: Fn(&Self) -> PyObjectRef,
2499 {
2500 let mut splits = Vec::new();
2501 let mut count = maxsplit;
2502 let mut haystack = self;
2503 while let Some(offset) = haystack.as_bytes().find_byteset(ASCII_WHITESPACES) {
2504 if offset != 0 {
2505 if count == 0 {
2506 break;
2507 }
2508 splits.push(convert(&haystack[..offset]));
2509 count -= 1;
2510 }
2511 haystack = &haystack[offset + 1..];
2512 }
2513 if !haystack.is_empty() {
2514 splits.push(convert(haystack));
2515 }
2516 splits
2517 }
2518
2519 fn py_rsplit_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
2520 where
2521 F: Fn(&Self) -> PyObjectRef,
2522 {
2523 let mut splits = Vec::new();
2525 let mut count = maxsplit;
2526 let mut haystack = self;
2527 while let Some(offset) = haystack.as_bytes().rfind_byteset(ASCII_WHITESPACES) {
2528 if offset + 1 != haystack.len() {
2529 if count == 0 {
2530 break;
2531 }
2532 splits.push(convert(&haystack[offset + 1..]));
2533 count -= 1;
2534 }
2535 haystack = &haystack[..offset];
2536 }
2537 if !haystack.is_empty() {
2538 splits.push(convert(haystack));
2539 }
2540 splits
2541 }
2542}
2543
2544pub type PyStrInterned = PyInterned<PyStr>;
2547
2548impl PyStrInterned {
2549 #[inline]
2550 pub fn to_exact(&'static self) -> PyRefExact<PyStr> {
2551 unsafe { PyRefExact::new_unchecked(self.to_owned()) }
2552 }
2553}
2554
2555impl core::fmt::Display for PyStrInterned {
2556 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2557 self.data.fmt(f)
2558 }
2559}
2560
2561impl AsRef<str> for PyStrInterned {
2562 #[inline(always)]
2563 fn as_ref(&self) -> &str {
2564 self.to_str()
2565 .expect("Interned PyStr should always be valid UTF-8")
2566 }
2567}
2568
2569pub type PyUtf8StrInterned = PyInterned<PyUtf8Str>;
2573
2574impl PyUtf8StrInterned {
2575 #[inline]
2577 pub fn as_str(&self) -> &str {
2578 Py::<PyUtf8Str>::as_str(self)
2579 }
2580
2581 #[inline]
2583 pub fn as_interned_str(&self) -> &PyStrInterned {
2584 unsafe { &*(self as *const Self as *const PyStrInterned) }
2587 }
2588
2589 #[inline]
2594 pub unsafe fn from_str_interned_unchecked(s: &PyStrInterned) -> &Self {
2595 unsafe { &*(s as *const PyStrInterned as *const Self) }
2596 }
2597}
2598
2599impl core::fmt::Display for PyUtf8StrInterned {
2600 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2601 f.write_str(self.as_str())
2602 }
2603}
2604
2605impl AsRef<str> for PyUtf8StrInterned {
2606 #[inline(always)]
2607 fn as_ref(&self) -> &str {
2608 self.as_str()
2609 }
2610}
2611
2612#[cfg(test)]
2613mod tests {
2614 use super::*;
2615 use crate::Interpreter;
2616 use rustpython_common::wtf8::Wtf8Buf;
2617
2618 #[test]
2619 fn str_title() {
2620 let tests = vec![
2621 (" Hello ", " hello "),
2622 ("Hello ", "hello "),
2623 ("Hello ", "Hello "),
2624 ("Format This As Title String", "fOrMaT thIs aS titLe String"),
2625 ("Format,This-As*Title;String", "fOrMaT,thIs-aS*titLe;String"),
2626 ("Getint", "getInt"),
2627 ("Greek Ωppercases ...", "greek ωppercases ..."),
2629 ("Greek ῼitlecases ...", "greek ῳitlecases ..."),
2631 ];
2632 for (title, input) in tests {
2633 assert_eq!(PyStr::from(input).title().as_str(), Ok(title));
2634 }
2635 }
2636
2637 #[test]
2638 fn str_istitle() {
2639 let pos = vec![
2640 "A",
2641 "A Titlecased Line",
2642 "A\nTitlecased Line",
2643 "A Titlecased, Line",
2644 "Greek Ωppercases ...",
2646 "Greek ῼitlecases ...",
2648 ];
2649
2650 for s in pos {
2651 assert!(PyStr::from(s).istitle());
2652 }
2653
2654 let neg = vec![
2655 "",
2656 "a",
2657 "\n",
2658 "Not a capitalized String",
2659 "Not\ta Titlecase String",
2660 "Not--a Titlecase String",
2661 "NOT",
2662 ];
2663 for s in neg {
2664 assert!(!PyStr::from(s).istitle());
2665 }
2666 }
2667
2668 #[test]
2669 fn str_maketrans_and_translate() {
2670 Interpreter::without_stdlib(Default::default()).enter(|vm| {
2671 let table = vm.ctx.new_dict();
2672 table
2673 .set_item("a", vm.ctx.new_str("🎅").into(), vm)
2674 .unwrap();
2675 table.set_item("b", vm.ctx.none(), vm).unwrap();
2676 table
2677 .set_item("c", vm.ctx.new_str(ascii!("xda")).into(), vm)
2678 .unwrap();
2679 let translated =
2680 PyStr::maketrans(table.into(), OptionalArg::Missing, OptionalArg::Missing, vm)
2681 .unwrap();
2682 let text = PyStr::from("abc");
2683 let translated = text.translate(translated, vm).unwrap();
2684 assert_eq!(translated, Wtf8Buf::from("🎅xda"));
2685 let translated = text.translate(vm.ctx.new_int(3).into(), vm);
2686 assert_eq!("TypeError", &*translated.unwrap_err().class().name(),);
2687 })
2688 }
2689}