1use crate::Py_hash_t;
2use crate::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_ssize_t};
3use libc::wchar_t;
4use std::ffi::{c_char, c_int, c_uint, c_void};
5
6#[cfg(not(Py_3_14))]
34#[repr(C)]
35struct BitfieldUnit<Storage> {
36 storage: Storage,
37}
38
39#[cfg(not(Py_3_14))]
40impl<Storage> BitfieldUnit<Storage> {
41 #[inline]
42 pub const fn new(storage: Storage) -> Self {
43 Self { storage }
44 }
45}
46
47#[cfg(not(Py_3_14))]
48impl<Storage> BitfieldUnit<Storage>
49where
50 Storage: AsRef<[u8]> + AsMut<[u8]>,
51{
52 #[inline]
53 fn get_bit(&self, index: usize) -> bool {
54 debug_assert!(index / 8 < self.storage.as_ref().len());
55 let byte_index = index / 8;
56 let byte = self.storage.as_ref()[byte_index];
57 let bit_index = if cfg!(target_endian = "big") {
58 7 - (index % 8)
59 } else {
60 index % 8
61 };
62 let mask = 1 << bit_index;
63 byte & mask == mask
64 }
65
66 #[inline]
67 fn set_bit(&mut self, index: usize, val: bool) {
68 debug_assert!(index / 8 < self.storage.as_ref().len());
69 let byte_index = index / 8;
70 let byte = &mut self.storage.as_mut()[byte_index];
71 let bit_index = if cfg!(target_endian = "big") {
72 7 - (index % 8)
73 } else {
74 index % 8
75 };
76 let mask = 1 << bit_index;
77 if val {
78 *byte |= mask;
79 } else {
80 *byte &= !mask;
81 }
82 }
83
84 #[inline]
85 fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
86 debug_assert!(bit_width <= 64);
87 debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
88 debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
89 let mut val = 0;
90 for i in 0..(bit_width as usize) {
91 if self.get_bit(i + bit_offset) {
92 let index = if cfg!(target_endian = "big") {
93 bit_width as usize - 1 - i
94 } else {
95 i
96 };
97 val |= 1 << index;
98 }
99 }
100 val
101 }
102
103 #[inline]
104 fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
105 debug_assert!(bit_width <= 64);
106 debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
107 debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
108 for i in 0..(bit_width as usize) {
109 let mask = 1 << i;
110 let val_bit_is_set = val & mask == mask;
111 let index = if cfg!(target_endian = "big") {
112 bit_width as usize - 1 - i
113 } else {
114 i
115 };
116 self.set_bit(index + bit_offset, val_bit_is_set);
117 }
118 }
119}
120
121#[cfg(not(Py_3_14))]
122const STATE_INTERNED_INDEX: usize = 0;
123#[cfg(not(Py_3_14))]
124const STATE_INTERNED_WIDTH: u8 = 2;
125
126#[cfg(not(Py_3_14))]
127const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize;
128#[cfg(not(Py_3_14))]
129const STATE_KIND_WIDTH: u8 = 3;
130
131#[cfg(not(Py_3_14))]
132const STATE_COMPACT_INDEX: usize = (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH) as usize;
133#[cfg(not(Py_3_14))]
134const STATE_COMPACT_WIDTH: u8 = 1;
135
136#[cfg(not(Py_3_14))]
137const STATE_ASCII_INDEX: usize =
138 (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH) as usize;
139#[cfg(not(Py_3_14))]
140const STATE_ASCII_WIDTH: u8 = 1;
141
142#[cfg(all(not(Py_3_14), Py_3_12))]
143const STATE_STATICALLY_ALLOCATED_INDEX: usize =
144 (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
145#[cfg(all(not(Py_3_14), Py_3_12))]
146const STATE_STATICALLY_ALLOCATED_WIDTH: u8 = 1;
147
148#[cfg(not(Py_3_12))]
149const STATE_READY_INDEX: usize =
150 (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
151#[cfg(not(Py_3_12))]
152const STATE_READY_WIDTH: u8 = 1;
153
154#[cfg(not(Py_3_14))]
164#[repr(C)]
165struct PyASCIIObjectState {
166 bitfield_align: [u8; 0],
167 bitfield: BitfieldUnit<[u8; 4usize]>,
168}
169
170#[cfg(not(Py_3_14))]
172#[allow(clippy::useless_transmute)]
173impl PyASCIIObjectState {
174 #[inline]
175 unsafe fn interned(&self) -> c_uint {
176 std::mem::transmute(
177 self.bitfield
178 .get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u32,
179 )
180 }
181
182 #[inline]
183 unsafe fn set_interned(&mut self, val: c_uint) {
184 let val: u32 = std::mem::transmute(val);
185 self.bitfield
186 .set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
187 }
188
189 #[inline]
190 unsafe fn kind(&self) -> c_uint {
191 std::mem::transmute(self.bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as u32)
192 }
193
194 #[inline]
195 unsafe fn set_kind(&mut self, val: c_uint) {
196 let val: u32 = std::mem::transmute(val);
197 self.bitfield
198 .set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
199 }
200
201 #[inline]
202 unsafe fn compact(&self) -> c_uint {
203 std::mem::transmute(self.bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as u32)
204 }
205
206 #[inline]
207 unsafe fn set_compact(&mut self, val: c_uint) {
208 let val: u32 = std::mem::transmute(val);
209 self.bitfield
210 .set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
211 }
212
213 #[inline]
214 unsafe fn ascii(&self) -> c_uint {
215 std::mem::transmute(self.bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as u32)
216 }
217
218 #[inline]
219 unsafe fn set_ascii(&mut self, val: c_uint) {
220 let val: u32 = std::mem::transmute(val);
221 self.bitfield
222 .set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
223 }
224
225 #[cfg(Py_3_12)]
226 #[inline]
227 unsafe fn statically_allocated(&self) -> c_uint {
228 std::mem::transmute(self.bitfield.get(
229 STATE_STATICALLY_ALLOCATED_INDEX,
230 STATE_STATICALLY_ALLOCATED_WIDTH,
231 ) as u32)
232 }
233
234 #[cfg(Py_3_12)]
235 #[inline]
236 unsafe fn set_statically_allocated(&mut self, val: c_uint) {
237 let val: u32 = std::mem::transmute(val);
238 self.bitfield.set(
239 STATE_STATICALLY_ALLOCATED_INDEX,
240 STATE_STATICALLY_ALLOCATED_WIDTH,
241 val as u64,
242 )
243 }
244
245 #[cfg(not(Py_3_12))]
246 #[inline]
247 unsafe fn ready(&self) -> c_uint {
248 std::mem::transmute(self.bitfield.get(STATE_READY_INDEX, STATE_READY_WIDTH) as u32)
249 }
250
251 #[cfg(not(Py_3_12))]
252 #[inline]
253 unsafe fn set_ready(&mut self, val: c_uint) {
254 let val: u32 = std::mem::transmute(val);
255 self.bitfield
256 .set(STATE_READY_INDEX, STATE_READY_WIDTH, val as u64)
257 }
258}
259
260#[cfg(not(Py_3_14))]
261impl From<u32> for PyASCIIObjectState {
262 #[inline]
263 fn from(value: u32) -> Self {
264 PyASCIIObjectState {
265 bitfield_align: [],
266 bitfield: BitfieldUnit::new(value.to_ne_bytes()),
267 }
268 }
269}
270
271#[cfg(not(Py_3_14))]
272impl From<PyASCIIObjectState> for u32 {
273 #[inline]
274 fn from(value: PyASCIIObjectState) -> Self {
275 u32::from_ne_bytes(value.bitfield.storage)
276 }
277}
278
279#[repr(C)]
280pub struct PyASCIIObject {
281 pub ob_base: PyObject,
282 pub length: Py_ssize_t,
283 pub hash: Py_hash_t,
284 pub state: u32,
306 #[cfg(not(Py_3_12))]
307 pub wstr: *mut wchar_t,
308}
309
310#[cfg(not(Py_3_14))]
312impl PyASCIIObject {
313 #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] #[inline]
319 pub unsafe fn interned(&self) -> c_uint {
320 PyASCIIObjectState::from(self.state).interned()
321 }
322
323 #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] #[inline]
330 pub unsafe fn set_interned(&mut self, val: c_uint) {
331 let mut state = PyASCIIObjectState::from(self.state);
332 state.set_interned(val);
333 self.state = u32::from(state);
334 }
335
336 #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
340 #[inline]
342 pub unsafe fn kind(&self) -> c_uint {
343 PyASCIIObjectState::from(self.state).kind()
344 }
345
346 #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
350 #[inline]
352 pub unsafe fn set_kind(&mut self, val: c_uint) {
353 let mut state = PyASCIIObjectState::from(self.state);
354 state.set_kind(val);
355 self.state = u32::from(state);
356 }
357
358 #[inline]
362 pub unsafe fn compact(&self) -> c_uint {
363 PyASCIIObjectState::from(self.state).compact()
364 }
365
366 #[inline]
370 pub unsafe fn set_compact(&mut self, val: c_uint) {
371 let mut state = PyASCIIObjectState::from(self.state);
372 state.set_compact(val);
373 self.state = u32::from(state);
374 }
375
376 #[inline]
380 pub unsafe fn ascii(&self) -> c_uint {
381 PyASCIIObjectState::from(self.state).ascii()
382 }
383
384 #[inline]
388 #[cfg(not(all(Py_3_14, Py_GIL_DISABLED)))]
389 pub unsafe fn set_ascii(&mut self, val: c_uint) {
390 let mut state = PyASCIIObjectState::from(self.state);
391 state.set_ascii(val);
392 self.state = u32::from(state);
393 }
394
395 #[cfg(not(Py_3_12))]
399 #[inline]
400 pub unsafe fn ready(&self) -> c_uint {
401 PyASCIIObjectState::from(self.state).ready()
402 }
403
404 #[cfg(not(Py_3_12))]
408 #[inline]
409 pub unsafe fn set_ready(&mut self, val: c_uint) {
410 let mut state = PyASCIIObjectState::from(self.state);
411 state.set_ready(val);
412 self.state = u32::from(state);
413 }
414
415 #[inline]
419 #[cfg(Py_3_12)]
420 pub unsafe fn statically_allocated(&self) -> c_uint {
421 PyASCIIObjectState::from(self.state).statically_allocated()
422 }
423
424 #[inline]
428 #[cfg(Py_3_12)]
429 pub unsafe fn set_statically_allocated(&mut self, val: c_uint) {
430 let mut state = PyASCIIObjectState::from(self.state);
431 state.set_statically_allocated(val);
432 self.state = u32::from(state);
433 }
434}
435
436#[repr(C)]
437pub struct PyCompactUnicodeObject {
438 pub _base: PyASCIIObject,
439 pub utf8_length: Py_ssize_t,
440 pub utf8: *mut c_char,
441 #[cfg(not(Py_3_12))]
442 pub wstr_length: Py_ssize_t,
443}
444
445#[repr(C)]
446pub union PyUnicodeObjectData {
447 pub any: *mut c_void,
448 pub latin1: *mut Py_UCS1,
449 pub ucs2: *mut Py_UCS2,
450 pub ucs4: *mut Py_UCS4,
451}
452
453#[repr(C)]
454pub struct PyUnicodeObject {
455 pub _base: PyCompactUnicodeObject,
456 pub data: PyUnicodeObjectData,
457}
458
459extern_libpython! {
460 pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
461}
462
463pub const SSTATE_NOT_INTERNED: c_uint = 0;
469pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
470pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
471#[cfg(Py_3_12)]
472pub const SSTATE_INTERNED_IMMORTAL_STATIC: c_uint = 3;
473
474#[cfg(not(Py_3_14))]
475#[inline]
476pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
477 debug_assert!(crate::PyUnicode_Check(op) != 0);
478 #[cfg(not(Py_3_12))]
479 debug_assert!(PyUnicode_IS_READY(op) != 0);
480
481 (*(op as *mut PyASCIIObject)).ascii()
482}
483
484#[cfg(not(Py_3_14))]
485#[inline]
486pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
487 (*(op as *mut PyASCIIObject)).compact()
488}
489
490#[cfg(not(Py_3_14))]
491#[inline]
492pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
493 ((*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0).into()
494}
495
496#[cfg(not(Py_3_12))]
497#[deprecated(note = "Removed in Python 3.12")]
498pub const PyUnicode_WCHAR_KIND: c_uint = 0;
499
500pub const PyUnicode_1BYTE_KIND: c_uint = 1;
501pub const PyUnicode_2BYTE_KIND: c_uint = 2;
502pub const PyUnicode_4BYTE_KIND: c_uint = 4;
503
504#[inline]
505pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
506 PyUnicode_DATA(op) as *mut Py_UCS1
507}
508
509#[inline]
510pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
511 PyUnicode_DATA(op) as *mut Py_UCS2
512}
513
514#[inline]
515pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
516 PyUnicode_DATA(op) as *mut Py_UCS4
517}
518
519#[cfg(Py_3_14)]
520extern_libpython! {
521 pub fn PyUnicode_KIND(op: *mut PyObject) -> c_uint;
522}
523
524#[cfg(not(Py_3_14))]
525#[inline]
526pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
527 debug_assert!(crate::PyUnicode_Check(op) != 0);
528 #[cfg(not(Py_3_12))]
529 debug_assert!(PyUnicode_IS_READY(op) != 0);
530
531 (*(op as *mut PyASCIIObject)).kind()
532}
533
534#[cfg(not(Py_3_14))]
535#[inline]
536pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
537 if PyUnicode_IS_ASCII(op) != 0 {
538 (op as *mut PyASCIIObject).offset(1) as *mut c_void
539 } else {
540 (op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
541 }
542}
543
544#[inline]
545pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
546 debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
547
548 (*(op as *mut PyUnicodeObject)).data.any
549}
550
551#[cfg(not(Py_3_14))]
552#[inline]
553pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
554 debug_assert!(crate::PyUnicode_Check(op) != 0);
555
556 if PyUnicode_IS_COMPACT(op) != 0 {
557 _PyUnicode_COMPACT_DATA(op)
558 } else {
559 _PyUnicode_NONCOMPACT_DATA(op)
560 }
561}
562
563#[cfg(Py_3_14)]
564extern_libpython! {
565 pub fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void;
566}
567
568#[inline]
573pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
574 debug_assert!(crate::PyUnicode_Check(op) != 0);
575 #[cfg(not(Py_3_12))]
576 debug_assert!(PyUnicode_IS_READY(op) != 0);
577
578 (*(op as *mut PyASCIIObject)).length
579}
580
581#[cfg(Py_3_12)]
582#[inline]
583pub unsafe fn PyUnicode_IS_READY(_op: *mut PyObject) -> c_uint {
584 1
586}
587
588#[cfg(not(Py_3_12))]
589#[inline]
590pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
591 (*(op as *mut PyASCIIObject)).ready()
592}
593
594#[cfg(Py_3_12)]
595#[inline]
596pub unsafe fn PyUnicode_READY(_op: *mut PyObject) -> c_int {
597 0
598}
599
600#[cfg(not(Py_3_12))]
601#[inline]
602pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
603 debug_assert!(crate::PyUnicode_Check(op) != 0);
604
605 if PyUnicode_IS_READY(op) != 0 {
606 0
607 } else {
608 _PyUnicode_Ready(op)
609 }
610}
611
612extern_libpython! {
617 pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
618 pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
619
620 pub fn PyUnicode_CopyCharacters(
623 to: *mut PyObject,
624 to_start: Py_ssize_t,
625 from: *mut PyObject,
626 from_start: Py_ssize_t,
627 how_many: Py_ssize_t,
628 ) -> Py_ssize_t;
629
630 pub fn PyUnicode_Fill(
633 unicode: *mut PyObject,
634 start: Py_ssize_t,
635 length: Py_ssize_t,
636 fill_char: Py_UCS4,
637 ) -> Py_ssize_t;
638
639 #[cfg(not(Py_3_12))]
642 #[deprecated]
643 pub fn PyUnicode_FromUnicode(u: *const wchar_t, size: Py_ssize_t) -> *mut PyObject;
644
645 pub fn PyUnicode_FromKindAndData(
646 kind: c_int,
647 buffer: *const c_void,
648 size: Py_ssize_t,
649 ) -> *mut PyObject;
650
651 #[cfg(not(Py_3_12))]
655 #[deprecated]
656 pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut wchar_t;
657
658 #[cfg(not(Py_3_12))]
661 #[deprecated]
662 pub fn PyUnicode_AsUnicodeAndSize(
663 unicode: *mut PyObject,
664 size: *mut Py_ssize_t,
665 ) -> *mut wchar_t;
666
667 }
669
670#[cfg(Py_3_14)]
671opaque_struct!(pub PyUnicodeWriter);
672
673extern_libpython! {
674 #[cfg(Py_3_14)]
675 pub fn PyUnicodeWriter_Create(length: Py_ssize_t) -> *mut PyUnicodeWriter;
676 #[cfg(Py_3_14)]
677 pub fn PyUnicodeWriter_Finish(writer: *mut PyUnicodeWriter) -> *mut PyObject;
678 #[cfg(Py_3_14)]
679 pub fn PyUnicodeWriter_Discard(writer: *mut PyUnicodeWriter);
680 #[cfg(Py_3_14)]
681 pub fn PyUnicodeWriter_WriteChar(writer: *mut PyUnicodeWriter, ch: Py_UCS4) -> c_int;
682 #[cfg(Py_3_14)]
683 pub fn PyUnicodeWriter_WriteUTF8(
684 writer: *mut PyUnicodeWriter,
685 str: *const c_char,
686 size: Py_ssize_t,
687 ) -> c_int;
688}
689
690extern_libpython! {
706 pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
709
710 pub fn PyUnicode_Encode(
713 s: *const wchar_t,
714 size: Py_ssize_t,
715 encoding: *const c_char,
716 errors: *const c_char,
717 ) -> *mut PyObject;
718
719 pub fn PyUnicode_EncodeUTF7(
720 data: *const wchar_t,
721 length: Py_ssize_t,
722 base64SetO: c_int,
723 base64WhiteSpace: c_int,
724 errors: *const c_char,
725 ) -> *mut PyObject;
726
727 pub fn PyUnicode_EncodeUTF8(
731 data: *const wchar_t,
732 length: Py_ssize_t,
733 errors: *const c_char,
734 ) -> *mut PyObject;
735
736 pub fn PyUnicode_EncodeUTF32(
737 data: *const wchar_t,
738 length: Py_ssize_t,
739 errors: *const c_char,
740 byteorder: c_int,
741 ) -> *mut PyObject;
742
743 pub fn PyUnicode_EncodeUTF16(
746 data: *const wchar_t,
747 length: Py_ssize_t,
748 errors: *const c_char,
749 byteorder: c_int,
750 ) -> *mut PyObject;
751
752 pub fn PyUnicode_EncodeUnicodeEscape(data: *const wchar_t, length: Py_ssize_t)
756 -> *mut PyObject;
757
758 pub fn PyUnicode_EncodeRawUnicodeEscape(
759 data: *const wchar_t,
760 length: Py_ssize_t,
761 ) -> *mut PyObject;
762
763 pub fn PyUnicode_EncodeLatin1(
766 data: *const wchar_t,
767 length: Py_ssize_t,
768 errors: *const c_char,
769 ) -> *mut PyObject;
770
771 pub fn PyUnicode_EncodeASCII(
774 data: *const wchar_t,
775 length: Py_ssize_t,
776 errors: *const c_char,
777 ) -> *mut PyObject;
778
779 pub fn PyUnicode_EncodeCharmap(
780 data: *const wchar_t,
781 length: Py_ssize_t,
782 mapping: *mut PyObject,
783 errors: *const c_char,
784 ) -> *mut PyObject;
785
786 pub fn PyUnicode_TranslateCharmap(
789 data: *const wchar_t,
790 length: Py_ssize_t,
791 table: *mut PyObject,
792 errors: *const c_char,
793 ) -> *mut PyObject;
794
795 pub fn PyUnicode_EncodeDecimal(
798 s: *mut wchar_t,
799 length: Py_ssize_t,
800 output: *mut c_char,
801 errors: *const c_char,
802 ) -> c_int;
803
804 pub fn PyUnicode_TransformDecimalToASCII(s: *mut wchar_t, length: Py_ssize_t) -> *mut PyObject;
805
806 }
808
809