Skip to main content

rustpython_vm/
cformat.rs

1//cspell:ignore bytesobject
2
3//! Implementation of Printf-Style string formatting
4//! as per the [Python Docs](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting).
5
6use crate::common::cformat::*;
7use crate::common::wtf8::{CodePoint, Wtf8, Wtf8Buf};
8use crate::{
9    AsObject, PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, TryFromObject,
10    VirtualMachine,
11    builtins::{
12        PyBaseExceptionRef, PyByteArray, PyBytes, PyFloat, PyInt, PyStr, try_f64_to_bigint, tuple,
13    },
14    function::ArgIntoFloat,
15    protocol::PyBuffer,
16    stdlib::builtins,
17};
18use itertools::Itertools;
19use num_traits::cast::ToPrimitive;
20
21fn spec_format_bytes(
22    vm: &VirtualMachine,
23    spec: &CFormatSpec,
24    obj: PyObjectRef,
25) -> PyResult<Vec<u8>> {
26    match &spec.format_type {
27        CFormatType::String(conversion) => match conversion {
28            // Unlike strings, %r and %a are identical for bytes: the behaviour corresponds to
29            // %a for strings (not %r)
30            CFormatConversion::Repr | CFormatConversion::Ascii => {
31                let b = builtins::ascii(obj, vm)?.as_bytes().to_vec();
32                Ok(b)
33            }
34            CFormatConversion::Str | CFormatConversion::Bytes => {
35                if let Ok(buffer) = PyBuffer::try_from_borrowed_object(vm, &obj) {
36                    Ok(buffer.contiguous_or_collect(|bytes| spec.format_bytes(bytes)))
37                } else {
38                    let bytes = vm
39                        .get_special_method(&obj, identifier!(vm, __bytes__))?
40                        .ok_or_else(|| {
41                            vm.new_type_error(format!(
42                                "%b requires a bytes-like object, or an object that \
43                                    implements __bytes__, not '{}'",
44                                obj.class().name()
45                            ))
46                        })?
47                        .invoke((), vm)?;
48                    let bytes = PyBytes::try_from_borrowed_object(vm, &bytes)?;
49                    Ok(spec.format_bytes(bytes.as_bytes()))
50                }
51            }
52        },
53        CFormatType::Number(number_type) => match number_type {
54            CNumberType::DecimalD | CNumberType::DecimalI | CNumberType::DecimalU => {
55                match_class!(match &obj {
56                    ref i @ PyInt => {
57                        Ok(spec.format_number(i.as_bigint()).into_bytes())
58                    }
59                    ref f @ PyFloat => {
60                        Ok(spec
61                            .format_number(&try_f64_to_bigint(f.to_f64(), vm)?)
62                            .into_bytes())
63                    }
64                    obj => {
65                        if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
66                            let result = method?.call((), vm)?;
67                            if let Some(i) = result.downcast_ref::<PyInt>() {
68                                return Ok(spec.format_number(i.as_bigint()).into_bytes());
69                            }
70                        }
71                        Err(vm.new_type_error(format!(
72                            "%{} format: a number is required, not {}",
73                            spec.format_type.to_char(),
74                            obj.class().name()
75                        )))
76                    }
77                })
78            }
79            _ => {
80                if let Some(i) = obj.downcast_ref::<PyInt>() {
81                    Ok(spec.format_number(i.as_bigint()).into_bytes())
82                } else {
83                    Err(vm.new_type_error(format!(
84                        "%{} format: an integer is required, not {}",
85                        spec.format_type.to_char(),
86                        obj.class().name()
87                    )))
88                }
89            }
90        },
91        CFormatType::Float(_) => {
92            let class = obj.class().to_owned();
93            let value = ArgIntoFloat::try_from_object(vm, obj).map_err(|e| {
94                if e.fast_isinstance(vm.ctx.exceptions.type_error) {
95                    // formatfloat in bytesobject.c generates its own specific exception
96                    // text in this case, mirror it here.
97                    vm.new_type_error(format!("float argument required, not {}", class.name()))
98                } else {
99                    e
100                }
101            })?;
102            Ok(spec.format_float(value.into()).into_bytes())
103        }
104        CFormatType::Character(CCharacterType::Character) => {
105            if let Some(i) = obj.downcast_ref::<PyInt>() {
106                let ch = i
107                    .try_to_primitive::<u8>(vm)
108                    .map_err(|_| vm.new_overflow_error("%c arg not in range(256)"))?;
109                return Ok(spec.format_char(ch));
110            }
111            if let Some(b) = obj.downcast_ref::<PyBytes>() {
112                if b.len() == 1 {
113                    return Ok(spec.format_char(b.as_bytes()[0]));
114                }
115            } else if let Some(ba) = obj.downcast_ref::<PyByteArray>() {
116                let buf = ba.borrow_buf();
117                if buf.len() == 1 {
118                    return Ok(spec.format_char(buf[0]));
119                }
120            }
121            Err(vm.new_type_error("%c requires an integer in range(256) or a single byte"))
122        }
123    }
124}
125
126fn spec_format_string(
127    vm: &VirtualMachine,
128    spec: &CFormatSpec,
129    obj: PyObjectRef,
130    idx: usize,
131) -> PyResult<Wtf8Buf> {
132    match &spec.format_type {
133        CFormatType::String(conversion) => {
134            let result = match conversion {
135                CFormatConversion::Ascii => builtins::ascii(obj, vm)?.as_wtf8().to_owned(),
136                CFormatConversion::Str => obj.str(vm)?.as_wtf8().to_owned(),
137                CFormatConversion::Repr => obj.repr(vm)?.as_wtf8().to_owned(),
138                CFormatConversion::Bytes => {
139                    // idx is the position of the %, we want the position of the b
140                    return Err(vm.new_value_error(format!(
141                        "unsupported format character 'b' (0x62) at index {}",
142                        idx + 1
143                    )));
144                }
145            };
146            Ok(spec.format_string(result))
147        }
148        CFormatType::Number(number_type) => match number_type {
149            CNumberType::DecimalD | CNumberType::DecimalI | CNumberType::DecimalU => {
150                match_class!(match &obj {
151                    ref i @ PyInt => {
152                        Ok(spec.format_number(i.as_bigint()).into())
153                    }
154                    ref f @ PyFloat => {
155                        Ok(spec
156                            .format_number(&try_f64_to_bigint(f.to_f64(), vm)?)
157                            .into())
158                    }
159                    obj => {
160                        if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
161                            let result = method?.call((), vm)?;
162                            if let Some(i) = result.downcast_ref::<PyInt>() {
163                                return Ok(spec.format_number(i.as_bigint()).into());
164                            }
165                        }
166                        Err(vm.new_type_error(format!(
167                            "%{} format: a number is required, not {}",
168                            spec.format_type.to_char(),
169                            obj.class().name()
170                        )))
171                    }
172                })
173            }
174            _ => {
175                if let Some(i) = obj.downcast_ref::<PyInt>() {
176                    Ok(spec.format_number(i.as_bigint()).into())
177                } else {
178                    Err(vm.new_type_error(format!(
179                        "%{} format: an integer is required, not {}",
180                        spec.format_type.to_char(),
181                        obj.class().name()
182                    )))
183                }
184            }
185        },
186        CFormatType::Float(_) => {
187            let value = ArgIntoFloat::try_from_object(vm, obj)?;
188            Ok(spec.format_float(value.into()).into())
189        }
190        CFormatType::Character(CCharacterType::Character) => {
191            if let Some(i) = obj.downcast_ref::<PyInt>() {
192                let ch = i
193                    .as_bigint()
194                    .to_u32()
195                    .and_then(CodePoint::from_u32)
196                    .ok_or_else(|| vm.new_overflow_error("%c arg not in range(0x110000)"))?;
197                return Ok(spec.format_char(ch));
198            }
199            if let Some(s) = obj.downcast_ref::<PyStr>()
200                && let Ok(ch) = s.as_wtf8().code_points().exactly_one()
201            {
202                return Ok(spec.format_char(ch));
203            }
204            Err(vm.new_type_error("%c requires int or char"))
205        }
206    }
207}
208
209fn try_update_quantity_from_element(
210    vm: &VirtualMachine,
211    element: Option<&PyObject>,
212) -> PyResult<CFormatQuantity> {
213    match element {
214        Some(width_obj) => {
215            if let Some(i) = width_obj.downcast_ref::<PyInt>() {
216                let i = i.try_to_primitive::<i32>(vm)?.unsigned_abs();
217                Ok(CFormatQuantity::Amount(i as usize))
218            } else {
219                Err(vm.new_type_error("* wants int"))
220            }
221        }
222        None => Err(vm.new_type_error("not enough arguments for format string")),
223    }
224}
225
226fn try_conversion_flag_from_tuple(
227    vm: &VirtualMachine,
228    element: Option<&PyObject>,
229) -> PyResult<CConversionFlags> {
230    match element {
231        Some(width_obj) => {
232            if let Some(i) = width_obj.downcast_ref::<PyInt>() {
233                let i = i.try_to_primitive::<i32>(vm)?;
234                let flags = if i < 0 {
235                    CConversionFlags::LEFT_ADJUST
236                } else {
237                    CConversionFlags::from_bits(0).unwrap()
238                };
239                Ok(flags)
240            } else {
241                Err(vm.new_type_error("* wants int"))
242            }
243        }
244        None => Err(vm.new_type_error("not enough arguments for format string")),
245    }
246}
247
248fn try_update_quantity_from_tuple<'a, I: Iterator<Item = &'a PyObjectRef>>(
249    vm: &VirtualMachine,
250    elements: &mut I,
251    q: &mut Option<CFormatQuantity>,
252    f: &mut CConversionFlags,
253) -> PyResult<()> {
254    let Some(CFormatQuantity::FromValuesTuple) = q else {
255        return Ok(());
256    };
257    let element = elements.next();
258    f.insert(try_conversion_flag_from_tuple(
259        vm,
260        element.map(|v| v.as_ref()),
261    )?);
262    let quantity = try_update_quantity_from_element(vm, element.map(|v| v.as_ref()))?;
263    *q = Some(quantity);
264    Ok(())
265}
266
267fn try_update_precision_from_tuple<'a, I: Iterator<Item = &'a PyObjectRef>>(
268    vm: &VirtualMachine,
269    elements: &mut I,
270    p: &mut Option<CFormatPrecision>,
271) -> PyResult<()> {
272    let Some(CFormatPrecision::Quantity(CFormatQuantity::FromValuesTuple)) = p else {
273        return Ok(());
274    };
275    let quantity = try_update_quantity_from_element(vm, elements.next().map(|v| v.as_ref()))?;
276    *p = Some(CFormatPrecision::Quantity(quantity));
277    Ok(())
278}
279
280fn specifier_error(vm: &VirtualMachine) -> PyBaseExceptionRef {
281    vm.new_type_error("format requires a mapping")
282}
283
284pub(crate) fn cformat_bytes(
285    vm: &VirtualMachine,
286    format_string: &[u8],
287    values_obj: PyObjectRef,
288) -> PyResult<Vec<u8>> {
289    let mut format = CFormatBytes::parse_from_bytes(format_string)
290        .map_err(|err| vm.new_value_error(err.to_string()))?;
291    let (num_specifiers, mapping_required) = format
292        .check_specifiers()
293        .ok_or_else(|| specifier_error(vm))?;
294
295    let mut result = vec![];
296
297    let is_mapping = values_obj.class().has_attr(identifier!(vm, __getitem__))
298        && !values_obj.fast_isinstance(vm.ctx.types.tuple_type)
299        && !values_obj.fast_isinstance(vm.ctx.types.bytes_type)
300        && !values_obj.fast_isinstance(vm.ctx.types.bytearray_type);
301
302    if num_specifiers == 0 {
303        // literal only
304        return if is_mapping
305            || values_obj
306                .downcast_ref::<tuple::PyTuple>()
307                .is_some_and(|e| e.is_empty())
308        {
309            for (_, part) in format.iter_mut() {
310                match part {
311                    CFormatPart::Literal(literal) => result.append(literal),
312                    CFormatPart::Spec(_) => unreachable!(),
313                }
314            }
315            Ok(result)
316        } else {
317            Err(vm.new_type_error("not all arguments converted during bytes formatting"))
318        };
319    }
320
321    if mapping_required {
322        // dict
323        return if is_mapping {
324            for (_, part) in format {
325                match part {
326                    CFormatPart::Literal(literal) => result.extend(literal),
327                    CFormatPart::Spec(CFormatSpecKeyed { mapping_key, spec }) => {
328                        let key = mapping_key.unwrap();
329                        let value = values_obj.get_item(&key, vm)?;
330                        let part_result = spec_format_bytes(vm, &spec, value)?;
331                        result.extend(part_result);
332                    }
333                }
334            }
335            Ok(result)
336        } else {
337            Err(vm.new_type_error("format requires a mapping"))
338        };
339    }
340
341    // tuple
342    let values = if let Some(tup) = values_obj.downcast_ref::<tuple::PyTuple>() {
343        tup.as_slice()
344    } else {
345        core::slice::from_ref(&values_obj)
346    };
347    let mut value_iter = values.iter();
348
349    for (_, part) in format {
350        match part {
351            CFormatPart::Literal(literal) => result.extend(literal),
352            CFormatPart::Spec(CFormatSpecKeyed { mut spec, .. }) => {
353                try_update_quantity_from_tuple(
354                    vm,
355                    &mut value_iter,
356                    &mut spec.min_field_width,
357                    &mut spec.flags,
358                )?;
359                try_update_precision_from_tuple(vm, &mut value_iter, &mut spec.precision)?;
360
361                let value = match value_iter.next() {
362                    Some(obj) => Ok(obj.clone()),
363                    None => Err(vm.new_type_error("not enough arguments for format string")),
364                }?;
365                let part_result = spec_format_bytes(vm, &spec, value)?;
366                result.extend(part_result);
367            }
368        }
369    }
370
371    // check that all arguments were converted
372    if value_iter.next().is_some() && !is_mapping {
373        Err(vm.new_type_error("not all arguments converted during bytes formatting"))
374    } else {
375        Ok(result)
376    }
377}
378
379pub(crate) fn cformat_string(
380    vm: &VirtualMachine,
381    format_string: &Wtf8,
382    values_obj: PyObjectRef,
383) -> PyResult<Wtf8Buf> {
384    let format = CFormatWtf8::parse_from_wtf8(format_string)
385        .map_err(|err| vm.new_value_error(err.to_string()))?;
386    let (num_specifiers, mapping_required) = format
387        .check_specifiers()
388        .ok_or_else(|| specifier_error(vm))?;
389
390    let mut result = Wtf8Buf::new();
391
392    let is_mapping = values_obj.class().has_attr(identifier!(vm, __getitem__))
393        && !values_obj.fast_isinstance(vm.ctx.types.tuple_type)
394        && !values_obj.fast_isinstance(vm.ctx.types.str_type);
395
396    if num_specifiers == 0 {
397        // literal only
398        return if is_mapping
399            || values_obj
400                .downcast_ref::<tuple::PyTuple>()
401                .is_some_and(|e| e.is_empty())
402        {
403            for (_, part) in format.iter() {
404                match part {
405                    CFormatPart::Literal(literal) => result.push_wtf8(literal),
406                    CFormatPart::Spec(_) => unreachable!(),
407                }
408            }
409            Ok(result)
410        } else {
411            Err(vm.new_type_error("not all arguments converted during string formatting"))
412        };
413    }
414
415    if mapping_required {
416        // dict
417        return if is_mapping {
418            for (idx, part) in format {
419                match part {
420                    CFormatPart::Literal(literal) => result.push_wtf8(&literal),
421                    CFormatPart::Spec(CFormatSpecKeyed { mapping_key, spec }) => {
422                        let value = values_obj.get_item(&mapping_key.unwrap(), vm)?;
423                        let part_result = spec_format_string(vm, &spec, value, idx)?;
424                        result.push_wtf8(&part_result);
425                    }
426                }
427            }
428            Ok(result)
429        } else {
430            Err(vm.new_type_error("format requires a mapping"))
431        };
432    }
433
434    // tuple
435    let values = if let Some(tup) = values_obj.downcast_ref::<tuple::PyTuple>() {
436        tup.as_slice()
437    } else {
438        core::slice::from_ref(&values_obj)
439    };
440    let mut value_iter = values.iter();
441
442    for (idx, part) in format {
443        match part {
444            CFormatPart::Literal(literal) => result.push_wtf8(&literal),
445            CFormatPart::Spec(CFormatSpecKeyed { mut spec, .. }) => {
446                try_update_quantity_from_tuple(
447                    vm,
448                    &mut value_iter,
449                    &mut spec.min_field_width,
450                    &mut spec.flags,
451                )?;
452                try_update_precision_from_tuple(vm, &mut value_iter, &mut spec.precision)?;
453
454                let value = match value_iter.next() {
455                    Some(obj) => Ok(obj.clone()),
456                    None => Err(vm.new_type_error("not enough arguments for format string")),
457                }?;
458                let part_result = spec_format_string(vm, &spec, value, idx)?;
459                result.push_wtf8(&part_result);
460            }
461        }
462    }
463
464    // check that all arguments were converted
465    if value_iter.next().is_some() && !is_mapping {
466        Err(vm.new_type_error("not all arguments converted during string formatting"))
467    } else {
468        Ok(result)
469    }
470}