rustpython_vm/
cformat.rs

1//! Implementation of Printf-Style string formatting
2//! as per the [Python Docs](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting).
3
4use crate::{
5    builtins::{
6        try_f64_to_bigint, tuple, PyBaseExceptionRef, PyByteArray, PyBytes, PyFloat, PyInt, PyStr,
7    },
8    function::ArgIntoFloat,
9    protocol::PyBuffer,
10    stdlib::builtins,
11    AsObject, PyObjectRef, PyResult, TryFromBorrowedObject, TryFromObject, VirtualMachine,
12};
13use itertools::Itertools;
14use num_traits::cast::ToPrimitive;
15use rustpython_format::cformat::*;
16use std::str::FromStr;
17
18fn spec_format_bytes(
19    vm: &VirtualMachine,
20    spec: &CFormatSpec,
21    obj: PyObjectRef,
22) -> PyResult<Vec<u8>> {
23    match &spec.format_type {
24        CFormatType::String(conversion) => match conversion {
25            // Unlike strings, %r and %a are identical for bytes: the behaviour corresponds to
26            // %a for strings (not %r)
27            CFormatConversion::Repr | CFormatConversion::Ascii => {
28                let b = builtins::ascii(obj, vm)?.into();
29                Ok(b)
30            }
31            CFormatConversion::Str | CFormatConversion::Bytes => {
32                if let Ok(buffer) = PyBuffer::try_from_borrowed_object(vm, &obj) {
33                    Ok(buffer.contiguous_or_collect(|bytes| spec.format_bytes(bytes)))
34                } else {
35                    let bytes = vm
36                        .get_special_method(&obj, identifier!(vm, __bytes__))?
37                        .ok_or_else(|| {
38                            vm.new_type_error(format!(
39                                "%b requires a bytes-like object, or an object that \
40                                    implements __bytes__, not '{}'",
41                                obj.class().name()
42                            ))
43                        })?
44                        .invoke((), vm)?;
45                    let bytes = PyBytes::try_from_borrowed_object(vm, &bytes)?;
46                    Ok(spec.format_bytes(bytes.as_bytes()))
47                }
48            }
49        },
50        CFormatType::Number(number_type) => match number_type {
51            CNumberType::Decimal => match_class!(match &obj {
52                ref i @ PyInt => {
53                    Ok(spec.format_number(i.as_bigint()).into_bytes())
54                }
55                ref f @ PyFloat => {
56                    Ok(spec
57                        .format_number(&try_f64_to_bigint(f.to_f64(), vm)?)
58                        .into_bytes())
59                }
60                obj => {
61                    if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
62                        let result = method?.call((), vm)?;
63                        if let Some(i) = result.payload::<PyInt>() {
64                            return Ok(spec.format_number(i.as_bigint()).into_bytes());
65                        }
66                    }
67                    Err(vm.new_type_error(format!(
68                        "%{} format: a number is required, not {}",
69                        spec.format_char,
70                        obj.class().name()
71                    )))
72                }
73            }),
74            _ => {
75                if let Some(i) = obj.payload::<PyInt>() {
76                    Ok(spec.format_number(i.as_bigint()).into_bytes())
77                } else {
78                    Err(vm.new_type_error(format!(
79                        "%{} format: an integer is required, not {}",
80                        spec.format_char,
81                        obj.class().name()
82                    )))
83                }
84            }
85        },
86        CFormatType::Float(_) => {
87            let class = obj.class().to_owned();
88            let value = ArgIntoFloat::try_from_object(vm, obj).map_err(|e| {
89                if e.fast_isinstance(vm.ctx.exceptions.type_error) {
90                    // formatfloat in bytesobject.c generates its own specific exception
91                    // text in this case, mirror it here.
92                    vm.new_type_error(format!("float argument required, not {}", class.name()))
93                } else {
94                    e
95                }
96            })?;
97            Ok(spec.format_float(value.into()).into_bytes())
98        }
99        CFormatType::Character => {
100            if let Some(i) = obj.payload::<PyInt>() {
101                let ch = i
102                    .try_to_primitive::<u8>(vm)
103                    .map_err(|_| vm.new_overflow_error("%c arg not in range(256)".to_owned()))?
104                    as char;
105                return Ok(spec.format_char(ch).into_bytes());
106            }
107            if let Some(b) = obj.payload::<PyBytes>() {
108                if b.len() == 1 {
109                    return Ok(spec.format_char(b.as_bytes()[0] as char).into_bytes());
110                }
111            } else if let Some(ba) = obj.payload::<PyByteArray>() {
112                let buf = ba.borrow_buf();
113                if buf.len() == 1 {
114                    return Ok(spec.format_char(buf[0] as char).into_bytes());
115                }
116            }
117            Err(vm
118                .new_type_error("%c requires an integer in range(256) or a single byte".to_owned()))
119        }
120    }
121}
122
123fn spec_format_string(
124    vm: &VirtualMachine,
125    spec: &CFormatSpec,
126    obj: PyObjectRef,
127    idx: &usize,
128) -> PyResult<String> {
129    match &spec.format_type {
130        CFormatType::String(conversion) => {
131            let result = match conversion {
132                CFormatConversion::Ascii => builtins::ascii(obj, vm)?.into(),
133                CFormatConversion::Str => obj.str(vm)?.as_str().to_owned(),
134                CFormatConversion::Repr => obj.repr(vm)?.as_str().to_owned(),
135                CFormatConversion::Bytes => {
136                    // idx is the position of the %, we want the position of the b
137                    return Err(vm.new_value_error(format!(
138                        "unsupported format character 'b' (0x62) at index {}",
139                        idx + 1
140                    )));
141                }
142            };
143            Ok(spec.format_string(result))
144        }
145        CFormatType::Number(number_type) => match number_type {
146            CNumberType::Decimal => match_class!(match &obj {
147                ref i @ PyInt => {
148                    Ok(spec.format_number(i.as_bigint()))
149                }
150                ref f @ PyFloat => {
151                    Ok(spec.format_number(&try_f64_to_bigint(f.to_f64(), vm)?))
152                }
153                obj => {
154                    if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
155                        let result = method?.call((), vm)?;
156                        if let Some(i) = result.payload::<PyInt>() {
157                            return Ok(spec.format_number(i.as_bigint()));
158                        }
159                    }
160                    Err(vm.new_type_error(format!(
161                        "%{} format: a number is required, not {}",
162                        spec.format_char,
163                        obj.class().name()
164                    )))
165                }
166            }),
167            _ => {
168                if let Some(i) = obj.payload::<PyInt>() {
169                    Ok(spec.format_number(i.as_bigint()))
170                } else {
171                    Err(vm.new_type_error(format!(
172                        "%{} format: an integer is required, not {}",
173                        spec.format_char,
174                        obj.class().name()
175                    )))
176                }
177            }
178        },
179        CFormatType::Float(_) => {
180            let value = ArgIntoFloat::try_from_object(vm, obj)?;
181            Ok(spec.format_float(value.into()))
182        }
183        CFormatType::Character => {
184            if let Some(i) = obj.payload::<PyInt>() {
185                let ch = i
186                    .as_bigint()
187                    .to_u32()
188                    .and_then(std::char::from_u32)
189                    .ok_or_else(|| {
190                        vm.new_overflow_error("%c arg not in range(0x110000)".to_owned())
191                    })?;
192                return Ok(spec.format_char(ch));
193            }
194            if let Some(s) = obj.payload::<PyStr>() {
195                if let Ok(ch) = s.as_str().chars().exactly_one() {
196                    return Ok(spec.format_char(ch));
197                }
198            }
199            Err(vm.new_type_error("%c requires int or char".to_owned()))
200        }
201    }
202}
203
204fn try_update_quantity_from_element(
205    vm: &VirtualMachine,
206    element: Option<&PyObjectRef>,
207) -> PyResult<CFormatQuantity> {
208    match element {
209        Some(width_obj) => {
210            if let Some(i) = width_obj.payload::<PyInt>() {
211                let i = i.try_to_primitive::<i32>(vm)?.unsigned_abs();
212                Ok(CFormatQuantity::Amount(i as usize))
213            } else {
214                Err(vm.new_type_error("* wants int".to_owned()))
215            }
216        }
217        None => Err(vm.new_type_error("not enough arguments for format string".to_owned())),
218    }
219}
220
221fn try_conversion_flag_from_tuple(
222    vm: &VirtualMachine,
223    element: Option<&PyObjectRef>,
224) -> PyResult<CConversionFlags> {
225    match element {
226        Some(width_obj) => {
227            if let Some(i) = width_obj.payload::<PyInt>() {
228                let i = i.try_to_primitive::<i32>(vm)?;
229                let flags = if i < 0 {
230                    CConversionFlags::LEFT_ADJUST
231                } else {
232                    CConversionFlags::from_bits(0).unwrap()
233                };
234                Ok(flags)
235            } else {
236                Err(vm.new_type_error("* wants int".to_owned()))
237            }
238        }
239        None => Err(vm.new_type_error("not enough arguments for format string".to_owned())),
240    }
241}
242
243fn try_update_quantity_from_tuple<'a, I: Iterator<Item = &'a PyObjectRef>>(
244    vm: &VirtualMachine,
245    elements: &mut I,
246    q: &mut Option<CFormatQuantity>,
247    f: &mut CConversionFlags,
248) -> PyResult<()> {
249    let Some(CFormatQuantity::FromValuesTuple) = q else {
250        return Ok(());
251    };
252    let element = elements.next();
253    f.insert(try_conversion_flag_from_tuple(vm, element)?);
254    let quantity = try_update_quantity_from_element(vm, element)?;
255    *q = Some(quantity);
256    Ok(())
257}
258
259fn try_update_precision_from_tuple<'a, I: Iterator<Item = &'a PyObjectRef>>(
260    vm: &VirtualMachine,
261    elements: &mut I,
262    p: &mut Option<CFormatPrecision>,
263) -> PyResult<()> {
264    let Some(CFormatPrecision::Quantity(CFormatQuantity::FromValuesTuple)) = p else {
265        return Ok(());
266    };
267    let quantity = try_update_quantity_from_element(vm, elements.next())?;
268    *p = Some(CFormatPrecision::Quantity(quantity));
269    Ok(())
270}
271
272fn specifier_error(vm: &VirtualMachine) -> PyBaseExceptionRef {
273    vm.new_type_error("format requires a mapping".to_owned())
274}
275
276pub(crate) fn cformat_bytes(
277    vm: &VirtualMachine,
278    format_string: &[u8],
279    values_obj: PyObjectRef,
280) -> PyResult<Vec<u8>> {
281    let mut format = CFormatBytes::parse_from_bytes(format_string)
282        .map_err(|err| vm.new_value_error(err.to_string()))?;
283    let (num_specifiers, mapping_required) = format
284        .check_specifiers()
285        .ok_or_else(|| specifier_error(vm))?;
286
287    let mut result = vec![];
288
289    let is_mapping = values_obj.class().has_attr(identifier!(vm, __getitem__))
290        && !values_obj.fast_isinstance(vm.ctx.types.tuple_type)
291        && !values_obj.fast_isinstance(vm.ctx.types.bytes_type)
292        && !values_obj.fast_isinstance(vm.ctx.types.bytearray_type);
293
294    if num_specifiers == 0 {
295        // literal only
296        return if is_mapping
297            || values_obj
298                .payload::<tuple::PyTuple>()
299                .map_or(false, |e| e.is_empty())
300        {
301            for (_, part) in format.iter_mut() {
302                match part {
303                    CFormatPart::Literal(literal) => result.append(literal),
304                    CFormatPart::Spec(_) => unreachable!(),
305                }
306            }
307            Ok(result)
308        } else {
309            Err(vm.new_type_error("not all arguments converted during bytes formatting".to_owned()))
310        };
311    }
312
313    if mapping_required {
314        // dict
315        return if is_mapping {
316            for (_, part) in format.iter_mut() {
317                match part {
318                    CFormatPart::Literal(literal) => result.append(literal),
319                    CFormatPart::Spec(spec) => {
320                        let value = match &spec.mapping_key {
321                            Some(key) => {
322                                let k = vm.ctx.new_bytes(key.as_str().as_bytes().to_vec());
323                                values_obj.get_item(k.as_object(), vm)?
324                            }
325                            None => unreachable!(),
326                        };
327                        let mut part_result = spec_format_bytes(vm, spec, value)?;
328                        result.append(&mut part_result);
329                    }
330                }
331            }
332            Ok(result)
333        } else {
334            Err(vm.new_type_error("format requires a mapping".to_owned()))
335        };
336    }
337
338    // tuple
339    let values = if let Some(tup) = values_obj.payload_if_subclass::<tuple::PyTuple>(vm) {
340        tup.as_slice()
341    } else {
342        std::slice::from_ref(&values_obj)
343    };
344    let mut value_iter = values.iter();
345
346    for (_, part) in format.iter_mut() {
347        match part {
348            CFormatPart::Literal(literal) => result.append(literal),
349            CFormatPart::Spec(spec) => {
350                try_update_quantity_from_tuple(
351                    vm,
352                    &mut value_iter,
353                    &mut spec.min_field_width,
354                    &mut spec.flags,
355                )?;
356                try_update_precision_from_tuple(vm, &mut value_iter, &mut spec.precision)?;
357
358                let value = match value_iter.next() {
359                    Some(obj) => Ok(obj.clone()),
360                    None => {
361                        Err(vm.new_type_error("not enough arguments for format string".to_owned()))
362                    }
363                }?;
364                let mut part_result = spec_format_bytes(vm, spec, value)?;
365                result.append(&mut part_result);
366            }
367        }
368    }
369
370    // check that all arguments were converted
371    if value_iter.next().is_some() && !is_mapping {
372        Err(vm.new_type_error("not all arguments converted during bytes formatting".to_owned()))
373    } else {
374        Ok(result)
375    }
376}
377
378pub(crate) fn cformat_string(
379    vm: &VirtualMachine,
380    format_string: &str,
381    values_obj: PyObjectRef,
382) -> PyResult<String> {
383    let mut format = CFormatString::from_str(format_string)
384        .map_err(|err| vm.new_value_error(err.to_string()))?;
385    let (num_specifiers, mapping_required) = format
386        .check_specifiers()
387        .ok_or_else(|| specifier_error(vm))?;
388
389    let mut result = String::new();
390
391    let is_mapping = values_obj.class().has_attr(identifier!(vm, __getitem__))
392        && !values_obj.fast_isinstance(vm.ctx.types.tuple_type)
393        && !values_obj.fast_isinstance(vm.ctx.types.str_type);
394
395    if num_specifiers == 0 {
396        // literal only
397        return if is_mapping
398            || values_obj
399                .payload::<tuple::PyTuple>()
400                .map_or(false, |e| e.is_empty())
401        {
402            for (_, part) in format.iter() {
403                match part {
404                    CFormatPart::Literal(literal) => result.push_str(literal),
405                    CFormatPart::Spec(_) => unreachable!(),
406                }
407            }
408            Ok(result)
409        } else {
410            Err(vm
411                .new_type_error("not all arguments converted during string formatting".to_owned()))
412        };
413    }
414
415    if mapping_required {
416        // dict
417        return if is_mapping {
418            for (idx, part) in format.iter() {
419                match part {
420                    CFormatPart::Literal(literal) => result.push_str(literal),
421                    CFormatPart::Spec(spec) => {
422                        let value = match &spec.mapping_key {
423                            Some(key) => values_obj.get_item(key.as_str(), vm)?,
424                            None => unreachable!(),
425                        };
426                        let part_result = spec_format_string(vm, spec, value, idx)?;
427                        result.push_str(&part_result);
428                    }
429                }
430            }
431            Ok(result)
432        } else {
433            Err(vm.new_type_error("format requires a mapping".to_owned()))
434        };
435    }
436
437    // tuple
438    let values = if let Some(tup) = values_obj.payload_if_subclass::<tuple::PyTuple>(vm) {
439        tup.as_slice()
440    } else {
441        std::slice::from_ref(&values_obj)
442    };
443    let mut value_iter = values.iter();
444
445    for (idx, part) in format.iter_mut() {
446        match part {
447            CFormatPart::Literal(literal) => result.push_str(literal),
448            CFormatPart::Spec(spec) => {
449                try_update_quantity_from_tuple(
450                    vm,
451                    &mut value_iter,
452                    &mut spec.min_field_width,
453                    &mut spec.flags,
454                )?;
455                try_update_precision_from_tuple(vm, &mut value_iter, &mut spec.precision)?;
456
457                let value = match value_iter.next() {
458                    Some(obj) => Ok(obj.clone()),
459                    None => {
460                        Err(vm.new_type_error("not enough arguments for format string".to_owned()))
461                    }
462                }?;
463                let part_result = spec_format_string(vm, spec, value, idx)?;
464                result.push_str(&part_result);
465            }
466        }
467    }
468
469    // check that all arguments were converted
470    if value_iter.next().is_some() && !is_mapping {
471        Err(vm.new_type_error("not all arguments converted during string formatting".to_owned()))
472    } else {
473        Ok(result)
474    }
475}