pyo3_ffi/cpython/
unicodeobject.rs

1#[cfg(any(Py_3_11, not(PyPy)))]
2use crate::Py_hash_t;
3use crate::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_ssize_t};
4use libc::wchar_t;
5use std::ffi::{c_char, c_int, c_uint, c_void};
6
7// skipped Py_UNICODE_ISSPACE()
8// skipped Py_UNICODE_ISLOWER()
9// skipped Py_UNICODE_ISUPPER()
10// skipped Py_UNICODE_ISTITLE()
11// skipped Py_UNICODE_ISLINEBREAK
12// skipped Py_UNICODE_TOLOWER
13// skipped Py_UNICODE_TOUPPER
14// skipped Py_UNICODE_TOTITLE
15// skipped Py_UNICODE_ISDECIMAL
16// skipped Py_UNICODE_ISDIGIT
17// skipped Py_UNICODE_ISNUMERIC
18// skipped Py_UNICODE_ISPRINTABLE
19// skipped Py_UNICODE_TODECIMAL
20// skipped Py_UNICODE_TODIGIT
21// skipped Py_UNICODE_TONUMERIC
22// skipped Py_UNICODE_ISALPHA
23// skipped Py_UNICODE_ISALNUM
24// skipped Py_UNICODE_COPY
25// skipped Py_UNICODE_FILL
26// skipped Py_UNICODE_IS_SURROGATE
27// skipped Py_UNICODE_IS_HIGH_SURROGATE
28// skipped Py_UNICODE_IS_LOW_SURROGATE
29// skipped Py_UNICODE_JOIN_SURROGATES
30// skipped Py_UNICODE_HIGH_SURROGATE
31// skipped Py_UNICODE_LOW_SURROGATE
32
33// generated by bindgen v0.63.0 (with small adaptations)
34#[cfg(not(Py_3_14))]
35#[repr(C)]
36struct BitfieldUnit<Storage> {
37    storage: Storage,
38}
39
40#[cfg(not(Py_3_14))]
41impl<Storage> BitfieldUnit<Storage> {
42    #[inline]
43    pub const fn new(storage: Storage) -> Self {
44        Self { storage }
45    }
46}
47
48#[cfg(not(any(GraalPy, Py_3_14)))]
49impl<Storage> BitfieldUnit<Storage>
50where
51    Storage: AsRef<[u8]> + AsMut<[u8]>,
52{
53    #[inline]
54    fn get_bit(&self, index: usize) -> bool {
55        debug_assert!(index / 8 < self.storage.as_ref().len());
56        let byte_index = index / 8;
57        let byte = self.storage.as_ref()[byte_index];
58        let bit_index = if cfg!(target_endian = "big") {
59            7 - (index % 8)
60        } else {
61            index % 8
62        };
63        let mask = 1 << bit_index;
64        byte & mask == mask
65    }
66
67    #[inline]
68    fn set_bit(&mut self, index: usize, val: bool) {
69        debug_assert!(index / 8 < self.storage.as_ref().len());
70        let byte_index = index / 8;
71        let byte = &mut self.storage.as_mut()[byte_index];
72        let bit_index = if cfg!(target_endian = "big") {
73            7 - (index % 8)
74        } else {
75            index % 8
76        };
77        let mask = 1 << bit_index;
78        if val {
79            *byte |= mask;
80        } else {
81            *byte &= !mask;
82        }
83    }
84
85    #[inline]
86    fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
87        debug_assert!(bit_width <= 64);
88        debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
89        debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
90        let mut val = 0;
91        for i in 0..(bit_width as usize) {
92            if self.get_bit(i + bit_offset) {
93                let index = if cfg!(target_endian = "big") {
94                    bit_width as usize - 1 - i
95                } else {
96                    i
97                };
98                val |= 1 << index;
99            }
100        }
101        val
102    }
103
104    #[inline]
105    fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
106        debug_assert!(bit_width <= 64);
107        debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
108        debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
109        for i in 0..(bit_width as usize) {
110            let mask = 1 << i;
111            let val_bit_is_set = val & mask == mask;
112            let index = if cfg!(target_endian = "big") {
113                bit_width as usize - 1 - i
114            } else {
115                i
116            };
117            self.set_bit(index + bit_offset, val_bit_is_set);
118        }
119    }
120}
121
122#[cfg(not(any(GraalPy, Py_3_14)))]
123const STATE_INTERNED_INDEX: usize = 0;
124#[cfg(not(any(GraalPy, Py_3_14)))]
125const STATE_INTERNED_WIDTH: u8 = 2;
126
127#[cfg(not(any(GraalPy, Py_3_14)))]
128const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize;
129#[cfg(not(any(GraalPy, Py_3_14)))]
130const STATE_KIND_WIDTH: u8 = 3;
131
132#[cfg(not(any(GraalPy, Py_3_14)))]
133const STATE_COMPACT_INDEX: usize = (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH) as usize;
134#[cfg(not(any(GraalPy, Py_3_14)))]
135const STATE_COMPACT_WIDTH: u8 = 1;
136
137#[cfg(not(any(GraalPy, Py_3_14)))]
138const STATE_ASCII_INDEX: usize =
139    (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH) as usize;
140#[cfg(not(any(GraalPy, Py_3_14)))]
141const STATE_ASCII_WIDTH: u8 = 1;
142
143#[cfg(all(not(any(GraalPy, Py_3_14)), Py_3_12))]
144const STATE_STATICALLY_ALLOCATED_INDEX: usize =
145    (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
146#[cfg(all(not(any(GraalPy, Py_3_14)), Py_3_12))]
147const STATE_STATICALLY_ALLOCATED_WIDTH: u8 = 1;
148
149#[cfg(not(any(Py_3_12, GraalPy)))]
150const STATE_READY_INDEX: usize =
151    (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
152#[cfg(not(any(Py_3_12, GraalPy)))]
153const STATE_READY_WIDTH: u8 = 1;
154
155// generated by bindgen v0.63.0 (with small adaptations)
156// The same code is generated for Python 3.7, 3.8, 3.9, 3.10, and 3.11, but the "ready" field
157// has been removed from Python 3.12.
158
159/// Wrapper around the `PyASCIIObject.state` bitfield with getters and setters that work
160/// on most little- and big-endian architectures.
161///
162/// Memory layout of C bitfields is implementation defined, so these functions are still
163/// unsafe. Users must verify that they work as expected on the architectures they target.
164#[cfg(not(Py_3_14))]
165#[repr(C)]
166struct PyASCIIObjectState {
167    bitfield_align: [u8; 0],
168    bitfield: BitfieldUnit<[u8; 4usize]>,
169}
170
171// c_uint and u32 are not necessarily the same type on all targets / architectures
172#[cfg(not(any(GraalPy, Py_3_14)))]
173#[allow(clippy::useless_transmute)]
174impl PyASCIIObjectState {
175    #[inline]
176    unsafe fn interned(&self) -> c_uint {
177        std::mem::transmute(
178            self.bitfield
179                .get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u32,
180        )
181    }
182
183    #[inline]
184    unsafe fn set_interned(&mut self, val: c_uint) {
185        let val: u32 = std::mem::transmute(val);
186        self.bitfield
187            .set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
188    }
189
190    #[inline]
191    unsafe fn kind(&self) -> c_uint {
192        std::mem::transmute(self.bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as u32)
193    }
194
195    #[inline]
196    unsafe fn set_kind(&mut self, val: c_uint) {
197        let val: u32 = std::mem::transmute(val);
198        self.bitfield
199            .set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
200    }
201
202    #[inline]
203    unsafe fn compact(&self) -> c_uint {
204        std::mem::transmute(self.bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as u32)
205    }
206
207    #[inline]
208    unsafe fn set_compact(&mut self, val: c_uint) {
209        let val: u32 = std::mem::transmute(val);
210        self.bitfield
211            .set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
212    }
213
214    #[inline]
215    unsafe fn ascii(&self) -> c_uint {
216        std::mem::transmute(self.bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as u32)
217    }
218
219    #[inline]
220    unsafe fn set_ascii(&mut self, val: c_uint) {
221        let val: u32 = std::mem::transmute(val);
222        self.bitfield
223            .set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
224    }
225
226    #[cfg(Py_3_12)]
227    #[inline]
228    unsafe fn statically_allocated(&self) -> c_uint {
229        std::mem::transmute(self.bitfield.get(
230            STATE_STATICALLY_ALLOCATED_INDEX,
231            STATE_STATICALLY_ALLOCATED_WIDTH,
232        ) as u32)
233    }
234
235    #[cfg(Py_3_12)]
236    #[inline]
237    unsafe fn set_statically_allocated(&mut self, val: c_uint) {
238        let val: u32 = std::mem::transmute(val);
239        self.bitfield.set(
240            STATE_STATICALLY_ALLOCATED_INDEX,
241            STATE_STATICALLY_ALLOCATED_WIDTH,
242            val as u64,
243        )
244    }
245
246    #[cfg(not(Py_3_12))]
247    #[inline]
248    unsafe fn ready(&self) -> c_uint {
249        std::mem::transmute(self.bitfield.get(STATE_READY_INDEX, STATE_READY_WIDTH) as u32)
250    }
251
252    #[cfg(not(Py_3_12))]
253    #[inline]
254    unsafe fn set_ready(&mut self, val: c_uint) {
255        let val: u32 = std::mem::transmute(val);
256        self.bitfield
257            .set(STATE_READY_INDEX, STATE_READY_WIDTH, val as u64)
258    }
259}
260
261#[cfg(not(Py_3_14))]
262impl From<u32> for PyASCIIObjectState {
263    #[inline]
264    fn from(value: u32) -> Self {
265        PyASCIIObjectState {
266            bitfield_align: [],
267            bitfield: BitfieldUnit::new(value.to_ne_bytes()),
268        }
269    }
270}
271
272#[cfg(not(Py_3_14))]
273impl From<PyASCIIObjectState> for u32 {
274    #[inline]
275    fn from(value: PyASCIIObjectState) -> Self {
276        u32::from_ne_bytes(value.bitfield.storage)
277    }
278}
279
280#[repr(C)]
281pub struct PyASCIIObject {
282    pub ob_base: PyObject,
283    pub length: Py_ssize_t,
284    #[cfg(any(Py_3_11, not(PyPy)))]
285    pub hash: Py_hash_t,
286    /// A bit field with various properties.
287    ///
288    /// Rust doesn't expose bitfields. So we have accessor functions for
289    /// retrieving values.
290    ///
291    /// Before 3.12:
292    /// unsigned int interned:2; // SSTATE_* constants.
293    /// unsigned int kind:3;     // PyUnicode_*_KIND constants.
294    /// unsigned int compact:1;
295    /// unsigned int ascii:1;
296    /// unsigned int ready:1;
297    /// unsigned int :24;
298    ///
299    /// 3.12, and 3.13
300    /// unsigned int interned:2; // SSTATE_* constants.
301    /// unsigned int kind:3;     // PyUnicode_*_KIND constants.
302    /// unsigned int compact:1;
303    /// unsigned int ascii:1;
304    /// unsigned int statically_allocated:1;
305    /// unsigned int :24;
306    /// on 3.14 and higher PyO3 doesn't access the internal state
307    pub state: u32,
308    #[cfg(not(Py_3_12))]
309    pub wstr: *mut wchar_t,
310}
311
312/// Interacting with the bitfield is not actually well-defined, so we mark these APIs unsafe.
313#[cfg(not(any(GraalPy, Py_3_14)))]
314impl PyASCIIObject {
315    #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] // SSTATE_INTERNED_IMMORTAL_STATIC requires 3.12
316    /// Get the `interned` field of the [`PyASCIIObject`] state bitfield.
317    ///
318    /// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`],
319    /// [`SSTATE_INTERNED_IMMORTAL`], or [`SSTATE_INTERNED_IMMORTAL_STATIC`].
320    #[inline]
321    pub unsafe fn interned(&self) -> c_uint {
322        PyASCIIObjectState::from(self.state).interned()
323    }
324
325    #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] // SSTATE_INTERNED_IMMORTAL_STATIC requires 3.12
326    /// Set the `interned` field of the [`PyASCIIObject`] state bitfield.
327    ///
328    /// Calling this function with an argument that is not [`SSTATE_NOT_INTERNED`],
329    /// [`SSTATE_INTERNED_MORTAL`], [`SSTATE_INTERNED_IMMORTAL`], or
330    /// [`SSTATE_INTERNED_IMMORTAL_STATIC`] is invalid.
331    #[inline]
332    pub unsafe fn set_interned(&mut self, val: c_uint) {
333        let mut state = PyASCIIObjectState::from(self.state);
334        state.set_interned(val);
335        self.state = u32::from(state);
336    }
337
338    /// Get the `kind` field of the [`PyASCIIObject`] state bitfield.
339    ///
340    /// Returns one of:
341    #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
342    /// [`PyUnicode_1BYTE_KIND`], [`PyUnicode_2BYTE_KIND`], or [`PyUnicode_4BYTE_KIND`].
343    #[inline]
344    pub unsafe fn kind(&self) -> c_uint {
345        PyASCIIObjectState::from(self.state).kind()
346    }
347
348    /// Set the `kind` field of the [`PyASCIIObject`] state bitfield.
349    ///
350    /// Calling this function with an argument that is not
351    #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
352    /// [`PyUnicode_1BYTE_KIND`], [`PyUnicode_2BYTE_KIND`], or [`PyUnicode_4BYTE_KIND`] is invalid.
353    #[inline]
354    pub unsafe fn set_kind(&mut self, val: c_uint) {
355        let mut state = PyASCIIObjectState::from(self.state);
356        state.set_kind(val);
357        self.state = u32::from(state);
358    }
359
360    /// Get the `compact` field of the [`PyASCIIObject`] state bitfield.
361    ///
362    /// Returns either `0` or `1`.
363    #[inline]
364    pub unsafe fn compact(&self) -> c_uint {
365        PyASCIIObjectState::from(self.state).compact()
366    }
367
368    /// Set the `compact` flag of the [`PyASCIIObject`] state bitfield.
369    ///
370    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
371    #[inline]
372    pub unsafe fn set_compact(&mut self, val: c_uint) {
373        let mut state = PyASCIIObjectState::from(self.state);
374        state.set_compact(val);
375        self.state = u32::from(state);
376    }
377
378    /// Get the `ascii` field of the [`PyASCIIObject`] state bitfield.
379    ///
380    /// Returns either `0` or `1`.
381    #[inline]
382    pub unsafe fn ascii(&self) -> c_uint {
383        PyASCIIObjectState::from(self.state).ascii()
384    }
385
386    /// Set the `ascii` flag of the [`PyASCIIObject`] state bitfield.
387    ///
388    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
389    #[inline]
390    #[cfg(not(all(Py_3_14, Py_GIL_DISABLED)))]
391    pub unsafe fn set_ascii(&mut self, val: c_uint) {
392        let mut state = PyASCIIObjectState::from(self.state);
393        state.set_ascii(val);
394        self.state = u32::from(state);
395    }
396
397    /// Get the `ready` field of the [`PyASCIIObject`] state bitfield.
398    ///
399    /// Returns either `0` or `1`.
400    #[cfg(not(Py_3_12))]
401    #[inline]
402    pub unsafe fn ready(&self) -> c_uint {
403        PyASCIIObjectState::from(self.state).ready()
404    }
405
406    /// Set the `ready` flag of the [`PyASCIIObject`] state bitfield.
407    ///
408    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
409    #[cfg(not(Py_3_12))]
410    #[inline]
411    pub unsafe fn set_ready(&mut self, val: c_uint) {
412        let mut state = PyASCIIObjectState::from(self.state);
413        state.set_ready(val);
414        self.state = u32::from(state);
415    }
416
417    /// Get the `statically_allocated` field of the [`PyASCIIObject`] state bitfield.
418    ///
419    /// Returns either `0` or `1`.
420    #[inline]
421    #[cfg(Py_3_12)]
422    pub unsafe fn statically_allocated(&self) -> c_uint {
423        PyASCIIObjectState::from(self.state).statically_allocated()
424    }
425
426    /// Set the `statically_allocated` flag of the [`PyASCIIObject`] state bitfield.
427    ///
428    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
429    #[inline]
430    #[cfg(Py_3_12)]
431    pub unsafe fn set_statically_allocated(&mut self, val: c_uint) {
432        let mut state = PyASCIIObjectState::from(self.state);
433        state.set_statically_allocated(val);
434        self.state = u32::from(state);
435    }
436}
437
438#[repr(C)]
439pub struct PyCompactUnicodeObject {
440    pub _base: PyASCIIObject,
441    pub utf8_length: Py_ssize_t,
442    pub utf8: *mut c_char,
443    #[cfg(not(Py_3_12))]
444    pub wstr_length: Py_ssize_t,
445}
446
447#[repr(C)]
448pub union PyUnicodeObjectData {
449    pub any: *mut c_void,
450    pub latin1: *mut Py_UCS1,
451    pub ucs2: *mut Py_UCS2,
452    pub ucs4: *mut Py_UCS4,
453}
454
455#[repr(C)]
456pub struct PyUnicodeObject {
457    pub _base: PyCompactUnicodeObject,
458    pub data: PyUnicodeObjectData,
459}
460
461extern "C" {
462    #[cfg(not(any(PyPy, GraalPy)))]
463    pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
464}
465
466// skipped PyUnicode_GET_SIZE
467// skipped PyUnicode_GET_DATA_SIZE
468// skipped PyUnicode_AS_UNICODE
469// skipped PyUnicode_AS_DATA
470
471pub const SSTATE_NOT_INTERNED: c_uint = 0;
472pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
473pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
474#[cfg(Py_3_12)]
475pub const SSTATE_INTERNED_IMMORTAL_STATIC: c_uint = 3;
476
477#[cfg(not(any(GraalPy, Py_3_14)))]
478#[inline]
479pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
480    debug_assert!(crate::PyUnicode_Check(op) != 0);
481    #[cfg(not(Py_3_12))]
482    debug_assert!(PyUnicode_IS_READY(op) != 0);
483
484    (*(op as *mut PyASCIIObject)).ascii()
485}
486
487#[cfg(not(any(GraalPy, Py_3_14)))]
488#[inline]
489pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
490    (*(op as *mut PyASCIIObject)).compact()
491}
492
493#[cfg(not(any(GraalPy, Py_3_14)))]
494#[inline]
495pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
496    ((*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0).into()
497}
498
499#[cfg(not(Py_3_12))]
500#[deprecated(note = "Removed in Python 3.12")]
501pub const PyUnicode_WCHAR_KIND: c_uint = 0;
502
503pub const PyUnicode_1BYTE_KIND: c_uint = 1;
504pub const PyUnicode_2BYTE_KIND: c_uint = 2;
505pub const PyUnicode_4BYTE_KIND: c_uint = 4;
506
507#[cfg(not(any(GraalPy, PyPy)))]
508#[inline]
509pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
510    PyUnicode_DATA(op) as *mut Py_UCS1
511}
512
513#[cfg(not(any(GraalPy, PyPy)))]
514#[inline]
515pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
516    PyUnicode_DATA(op) as *mut Py_UCS2
517}
518
519#[cfg(not(any(GraalPy, PyPy)))]
520#[inline]
521pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
522    PyUnicode_DATA(op) as *mut Py_UCS4
523}
524
525#[cfg(all(not(GraalPy), Py_3_14))]
526extern "C" {
527    #[cfg_attr(PyPy, link_name = "PyPyUnicode_KIND")]
528    pub fn PyUnicode_KIND(op: *mut PyObject) -> c_uint;
529}
530
531#[cfg(all(not(GraalPy), not(Py_3_14)))]
532#[inline]
533pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
534    debug_assert!(crate::PyUnicode_Check(op) != 0);
535    #[cfg(not(Py_3_12))]
536    debug_assert!(PyUnicode_IS_READY(op) != 0);
537
538    (*(op as *mut PyASCIIObject)).kind()
539}
540
541#[cfg(not(any(GraalPy, Py_3_14)))]
542#[inline]
543pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
544    if PyUnicode_IS_ASCII(op) != 0 {
545        (op as *mut PyASCIIObject).offset(1) as *mut c_void
546    } else {
547        (op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
548    }
549}
550
551#[cfg(not(any(GraalPy, PyPy)))]
552#[inline]
553pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
554    debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
555
556    (*(op as *mut PyUnicodeObject)).data.any
557}
558
559#[cfg(not(any(GraalPy, PyPy, Py_3_14)))]
560#[inline]
561pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
562    debug_assert!(crate::PyUnicode_Check(op) != 0);
563
564    if PyUnicode_IS_COMPACT(op) != 0 {
565        _PyUnicode_COMPACT_DATA(op)
566    } else {
567        _PyUnicode_NONCOMPACT_DATA(op)
568    }
569}
570
571#[cfg(Py_3_14)]
572#[cfg(all(not(GraalPy), Py_3_14))]
573extern "C" {
574    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DATA")]
575    pub fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void;
576}
577
578// skipped PyUnicode_WRITE
579// skipped PyUnicode_READ
580// skipped PyUnicode_READ_CHAR
581
582#[cfg(not(GraalPy))]
583#[inline]
584pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
585    debug_assert!(crate::PyUnicode_Check(op) != 0);
586    #[cfg(not(Py_3_12))]
587    debug_assert!(PyUnicode_IS_READY(op) != 0);
588
589    (*(op as *mut PyASCIIObject)).length
590}
591
592#[cfg(any(Py_3_12, GraalPy))]
593#[inline]
594pub unsafe fn PyUnicode_IS_READY(_op: *mut PyObject) -> c_uint {
595    // kept in CPython for backwards compatibility
596    1
597}
598
599#[cfg(not(any(GraalPy, Py_3_12)))]
600#[inline]
601pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
602    (*(op as *mut PyASCIIObject)).ready()
603}
604
605#[cfg(any(Py_3_12, GraalPy))]
606#[inline]
607pub unsafe fn PyUnicode_READY(_op: *mut PyObject) -> c_int {
608    0
609}
610
611#[cfg(not(any(Py_3_12, GraalPy)))]
612#[inline]
613pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
614    debug_assert!(crate::PyUnicode_Check(op) != 0);
615
616    if PyUnicode_IS_READY(op) != 0 {
617        0
618    } else {
619        _PyUnicode_Ready(op)
620    }
621}
622
623// skipped PyUnicode_MAX_CHAR_VALUE
624// skipped _PyUnicode_get_wstr_length
625// skipped PyUnicode_WSTR_LENGTH
626
627extern "C" {
628    #[cfg_attr(PyPy, link_name = "PyPyUnicode_New")]
629    pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
630    #[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")]
631    pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
632
633    // skipped _PyUnicode_Copy
634
635    #[cfg(not(PyPy))]
636    pub fn PyUnicode_CopyCharacters(
637        to: *mut PyObject,
638        to_start: Py_ssize_t,
639        from: *mut PyObject,
640        from_start: Py_ssize_t,
641        how_many: Py_ssize_t,
642    ) -> Py_ssize_t;
643
644    // skipped _PyUnicode_FastCopyCharacters
645
646    #[cfg(not(PyPy))]
647    pub fn PyUnicode_Fill(
648        unicode: *mut PyObject,
649        start: Py_ssize_t,
650        length: Py_ssize_t,
651        fill_char: Py_UCS4,
652    ) -> Py_ssize_t;
653
654    // skipped _PyUnicode_FastFill
655
656    #[cfg(not(Py_3_12))]
657    #[deprecated]
658    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
659    pub fn PyUnicode_FromUnicode(u: *const wchar_t, size: Py_ssize_t) -> *mut PyObject;
660
661    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")]
662    pub fn PyUnicode_FromKindAndData(
663        kind: c_int,
664        buffer: *const c_void,
665        size: Py_ssize_t,
666    ) -> *mut PyObject;
667
668    // skipped _PyUnicode_FromASCII
669    // skipped _PyUnicode_FindMaxChar
670
671    #[cfg(not(Py_3_12))]
672    #[deprecated]
673    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
674    pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut wchar_t;
675
676    // skipped _PyUnicode_AsUnicode
677
678    #[cfg(not(Py_3_12))]
679    #[deprecated]
680    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
681    pub fn PyUnicode_AsUnicodeAndSize(
682        unicode: *mut PyObject,
683        size: *mut Py_ssize_t,
684    ) -> *mut wchar_t;
685
686    // skipped PyUnicode_GetMax
687}
688
689// skipped _PyUnicodeWriter
690// skipped _PyUnicodeWriter_Init
691// skipped _PyUnicodeWriter_Prepare
692// skipped _PyUnicodeWriter_PrepareInternal
693// skipped _PyUnicodeWriter_PrepareKind
694// skipped _PyUnicodeWriter_PrepareKindInternal
695// skipped _PyUnicodeWriter_WriteChar
696// skipped _PyUnicodeWriter_WriteStr
697// skipped _PyUnicodeWriter_WriteSubstring
698// skipped _PyUnicodeWriter_WriteASCIIString
699// skipped _PyUnicodeWriter_WriteLatin1String
700// skipped _PyUnicodeWriter_Finish
701// skipped _PyUnicodeWriter_Dealloc
702// skipped _PyUnicode_FormatAdvancedWriter
703
704extern "C" {
705    // skipped _PyUnicode_AsStringAndSize
706
707    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
708    pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
709
710    // skipped _PyUnicode_AsString
711
712    pub fn PyUnicode_Encode(
713        s: *const wchar_t,
714        size: Py_ssize_t,
715        encoding: *const c_char,
716        errors: *const c_char,
717    ) -> *mut PyObject;
718
719    pub fn PyUnicode_EncodeUTF7(
720        data: *const wchar_t,
721        length: Py_ssize_t,
722        base64SetO: c_int,
723        base64WhiteSpace: c_int,
724        errors: *const c_char,
725    ) -> *mut PyObject;
726
727    // skipped _PyUnicode_EncodeUTF7
728    // skipped _PyUnicode_AsUTF8String
729
730    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
731    pub fn PyUnicode_EncodeUTF8(
732        data: *const wchar_t,
733        length: Py_ssize_t,
734        errors: *const c_char,
735    ) -> *mut PyObject;
736
737    pub fn PyUnicode_EncodeUTF32(
738        data: *const wchar_t,
739        length: Py_ssize_t,
740        errors: *const c_char,
741        byteorder: c_int,
742    ) -> *mut PyObject;
743
744    // skipped _PyUnicode_EncodeUTF32
745
746    pub fn PyUnicode_EncodeUTF16(
747        data: *const wchar_t,
748        length: Py_ssize_t,
749        errors: *const c_char,
750        byteorder: c_int,
751    ) -> *mut PyObject;
752
753    // skipped _PyUnicode_EncodeUTF16
754    // skipped _PyUnicode_DecodeUnicodeEscape
755
756    pub fn PyUnicode_EncodeUnicodeEscape(data: *const wchar_t, length: Py_ssize_t)
757        -> *mut PyObject;
758
759    pub fn PyUnicode_EncodeRawUnicodeEscape(
760        data: *const wchar_t,
761        length: Py_ssize_t,
762    ) -> *mut PyObject;
763
764    // skipped _PyUnicode_AsLatin1String
765
766    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
767    pub fn PyUnicode_EncodeLatin1(
768        data: *const wchar_t,
769        length: Py_ssize_t,
770        errors: *const c_char,
771    ) -> *mut PyObject;
772
773    // skipped _PyUnicode_AsASCIIString
774
775    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
776    pub fn PyUnicode_EncodeASCII(
777        data: *const wchar_t,
778        length: Py_ssize_t,
779        errors: *const c_char,
780    ) -> *mut PyObject;
781
782    pub fn PyUnicode_EncodeCharmap(
783        data: *const wchar_t,
784        length: Py_ssize_t,
785        mapping: *mut PyObject,
786        errors: *const c_char,
787    ) -> *mut PyObject;
788
789    // skipped _PyUnicode_EncodeCharmap
790
791    pub fn PyUnicode_TranslateCharmap(
792        data: *const wchar_t,
793        length: Py_ssize_t,
794        table: *mut PyObject,
795        errors: *const c_char,
796    ) -> *mut PyObject;
797
798    // skipped PyUnicode_EncodeMBCS
799
800    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
801    pub fn PyUnicode_EncodeDecimal(
802        s: *mut wchar_t,
803        length: Py_ssize_t,
804        output: *mut c_char,
805        errors: *const c_char,
806    ) -> c_int;
807
808    #[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
809    pub fn PyUnicode_TransformDecimalToASCII(s: *mut wchar_t, length: Py_ssize_t) -> *mut PyObject;
810
811    // skipped _PyUnicode_TransformDecimalAndSpaceToASCII
812}
813
814// skipped _PyUnicode_JoinArray
815// skipped _PyUnicode_EqualToASCIIId
816// skipped _PyUnicode_EqualToASCIIString
817// skipped _PyUnicode_XStrip
818// skipped _PyUnicode_InsertThousandsGrouping
819
820// skipped _Py_ascii_whitespace
821
822// skipped _PyUnicode_IsLowercase
823// skipped _PyUnicode_IsUppercase
824// skipped _PyUnicode_IsTitlecase
825// skipped _PyUnicode_IsXidStart
826// skipped _PyUnicode_IsXidContinue
827// skipped _PyUnicode_IsWhitespace
828// skipped _PyUnicode_IsLinebreak
829// skipped _PyUnicode_ToLowercase
830// skipped _PyUnicode_ToUppercase
831// skipped _PyUnicode_ToTitlecase
832// skipped _PyUnicode_ToLowerFull
833// skipped _PyUnicode_ToTitleFull
834// skipped _PyUnicode_ToUpperFull
835// skipped _PyUnicode_ToFoldedFull
836// skipped _PyUnicode_IsCaseIgnorable
837// skipped _PyUnicode_IsCased
838// skipped _PyUnicode_ToDecimalDigit
839// skipped _PyUnicode_ToDigit
840// skipped _PyUnicode_ToNumeric
841// skipped _PyUnicode_IsDecimalDigit
842// skipped _PyUnicode_IsDigit
843// skipped _PyUnicode_IsNumeric
844// skipped _PyUnicode_IsPrintable
845// skipped _PyUnicode_IsAlpha
846// skipped Py_UNICODE_strlen
847// skipped Py_UNICODE_strcpy
848// skipped Py_UNICODE_strcat
849// skipped Py_UNICODE_strncpy
850// skipped Py_UNICODE_strcmp
851// skipped Py_UNICODE_strncmp
852// skipped Py_UNICODE_strchr
853// skipped Py_UNICODE_strrchr
854// skipped _PyUnicode_FormatLong
855// skipped PyUnicode_AsUnicodeCopy
856// skipped _PyUnicode_FromId
857// skipped _PyUnicode_EQ
858// skipped _PyUnicode_ScanIdentifier