Skip to main content

py_canon/
canon.rs

1//! Native dup-defs canonicalization via ruff's parser + AST — no CPython `ast.*`, rayon-parallel,
2//! so it runs fast on stock CPython while staying byte-compatible with the reference.
3//!
4//! The canonical string is **CPython `ast.dump(node, annotate_fields=False)`** reproduced from the
5//! ruff AST: same node names, same ASDL field order, same `show_empty=False` rule (trailing
6//! `None`/`[]` dropped; an emptied field before a present one switches the rest to `name=` keyword
7//! form), and Python `repr` for literals. Reproducing that exact shape is what keeps the downstream
8//! difflib ratios (clustering) and the alpha-renamed equality (cross-name) aligned with CPython's
9//! `ast` — a terser form drifts the ratios. Two passes, both off one `Collect` of bound locals:
10//! clustering keeps identifiers, cross-name alpha-renames them (`_v{n}`) and blanks the def name.
11#![allow(clippy::doc_markdown)]
12
13use std::collections::{HashMap, HashSet};
14use std::fmt::Write as _;
15
16/// `(cluster_canonical, xname_canonical, type3_lines, node_count)` — the analysis tuple the scan
17/// reads to build a `Def`'s cluster canonical + `Analysis`. `py-canon`'s own type (was shared via
18/// `dup-defs-core`, now local since the engine consumes `Def`, not this tuple).
19pub use find_dup_defs_canon::AnalyzedFn;
20use rayon::prelude::*;
21use ruff_python_ast::visitor::{self, Visitor};
22use ruff_python_ast::{
23    self as ast, BoolOp, CmpOp, Comprehension, ExceptHandler, Expr, ExprContext, Keyword, Number,
24    Operator, Parameter, Parameters, Stmt, UnaryOp, WithItem,
25};
26use ruff_python_parser::parse_module;
27use ruff_text_size::Ranged;
28
29// ---------------------------------------------------------------------------
30// Bound-locals collection (cross-name rename targets) — unchanged in spirit: params + assignment /
31// loop / with / except / import / comprehension / walrus targets. Free names carry behaviour, kept.
32// ---------------------------------------------------------------------------
33
34/// Collect names *bound* anywhere in the function (cross-name rename set).
35#[derive(Default)]
36struct Collect {
37    bound: HashSet<String>,
38}
39
40impl Collect {
41    /// The top function's own params (posonly + args + kwonly + `*args` + `**kwargs`). CPython's
42    /// `_collect_locals` adds these explicitly and does NOT collect *nested* defs' params.
43    fn add_params(&mut self, params: &Parameters) {
44        for x in params.posonlyargs.iter().chain(params.args.iter()).chain(params.kwonlyargs.iter()) {
45            self.bound.insert(x.parameter.name.id.as_str().to_owned());
46        }
47        if let Some(vararg) = &params.vararg {
48            self.bound.insert(vararg.name.id.as_str().to_owned());
49        }
50        if let Some(kwarg) = &params.kwarg {
51            self.bound.insert(kwarg.name.id.as_str().to_owned());
52        }
53    }
54
55    fn add_target(&mut self, expr: &Expr) {
56        match expr {
57            Expr::Name(name) => {
58                self.bound.insert(name.id.as_str().to_owned());
59            }
60            Expr::Starred(starred) => self.add_target(&starred.value),
61            Expr::Tuple(tuple) => tuple.elts.iter().for_each(|elt| self.add_target(elt)),
62            Expr::List(list) => list.elts.iter().for_each(|elt| self.add_target(elt)),
63            _ => {} // Attribute / Subscript targets bind no local name
64        }
65    }
66}
67
68impl<'a> Visitor<'a> for Collect {
69    fn visit_stmt(&mut self, stmt: &'a Stmt) {
70        match stmt {
71            Stmt::Assign(node) => node.targets.iter().for_each(|t| self.add_target(t)),
72            Stmt::AnnAssign(node) => self.add_target(&node.target),
73            Stmt::AugAssign(node) => self.add_target(&node.target),
74            Stmt::For(node) => self.add_target(&node.target),
75            Stmt::Import(node) => {
76                for alias in &node.names {
77                    let name = match &alias.asname {
78                        Some(asname) => asname.id.as_str().to_owned(),
79                        None => alias.name.id.as_str().split('.').next().unwrap_or("").to_owned(),
80                    };
81                    self.bound.insert(name);
82                }
83            }
84            Stmt::ImportFrom(node) => {
85                for alias in &node.names {
86                    let name = alias.asname.as_ref().unwrap_or(&alias.name);
87                    self.bound.insert(name.id.as_str().to_owned());
88                }
89            }
90            _ => {}
91        }
92        visitor::walk_stmt(self, stmt);
93    }
94
95    fn visit_expr(&mut self, expr: &'a Expr) {
96        match expr {
97            Expr::Named(named) => self.add_target(&named.target),
98            // Lambda params bind locals at any depth (CPython collects posonly+args+kwonly only —
99            // not `*args`/`**kwargs`). Nested `def` params are deliberately NOT collected; only the
100            // top function's params (added via `add_params`) and lambda params count.
101            Expr::Lambda(lam) => {
102                if let Some(p) = &lam.parameters {
103                    for x in p.posonlyargs.iter().chain(p.args.iter()).chain(p.kwonlyargs.iter()) {
104                        self.bound.insert(x.parameter.name.id.as_str().to_owned());
105                    }
106                }
107            }
108            _ => {}
109        }
110        visitor::walk_expr(self, expr);
111    }
112
113    fn visit_comprehension(&mut self, comprehension: &'a Comprehension) {
114        self.add_target(&comprehension.target);
115        visitor::walk_comprehension(self, comprehension);
116    }
117
118    fn visit_except_handler(&mut self, except_handler: &'a ExceptHandler) {
119        let ExceptHandler::ExceptHandler(handler) = except_handler;
120        if let Some(name) = &handler.name {
121            self.bound.insert(name.id.as_str().to_owned());
122        }
123        visitor::walk_except_handler(self, except_handler);
124    }
125
126    fn visit_with_item(&mut self, with_item: &'a WithItem) {
127        if let Some(vars) = &with_item.optional_vars {
128            self.add_target(vars);
129        }
130        visitor::walk_with_item(self, with_item);
131    }
132}
133
134// ---------------------------------------------------------------------------
135// Python repr helpers — match CPython's `repr` for the literal kinds `ast.dump` embeds.
136// ---------------------------------------------------------------------------
137
138/// CPython `repr(str)`: single quotes unless that forces escaping a lone `'` while `"` is free;
139/// `\\ \n \r \t` and C0/DEL controls escaped, printable (incl. non-ASCII) kept verbatim.
140fn repr_str(s: &str) -> String {
141    let quote = if s.contains('\'') && !s.contains('"') { '"' } else { '\'' };
142    let mut out = String::with_capacity(s.len() + 2);
143    out.push(quote);
144    for c in s.chars() {
145        match c {
146            '\\' => out.push_str("\\\\"),
147            '\n' => out.push_str("\\n"),
148            '\r' => out.push_str("\\r"),
149            '\t' => out.push_str("\\t"),
150            c if c == quote => {
151                out.push('\\');
152                out.push(c);
153            }
154            c if needs_escape(c) => {
155                let cp = c as u32;
156                if cp < 0x100 {
157                    let _ = write!(out, "\\x{cp:02x}");
158                } else if cp < 0x10000 {
159                    let _ = write!(out, "\\u{cp:04x}");
160                } else {
161                    let _ = write!(out, "\\U{cp:08x}");
162                }
163            }
164            c => out.push(c),
165        }
166    }
167    out.push(quote);
168    out
169}
170
171/// Chars CPython's `repr` escapes as `\xNN`/`\uNNNN` (non-printable): C0/C1 controls plus the
172/// common zero-width / BOM / line-separator format chars. (Full `str.isprintable` parity would
173/// need Unicode tables; these cover what shows up in source-literal text.)
174fn needs_escape(c: char) -> bool {
175    c.is_control()
176        || matches!(c, '\u{ad}' | '\u{feff}' | '\u{2028}' | '\u{2029}' | '\u{2060}')
177        || ('\u{200b}'..='\u{200f}').contains(&c)
178        // non-`U+0020` Unicode spaces (category Zs) — `str.isprintable()` is False for these
179        || matches!(c, '\u{a0}' | '\u{202f}' | '\u{205f}' | '\u{3000}' | '\u{1680}')
180        || ('\u{2000}'..='\u{200a}').contains(&c)
181}
182
183/// CPython `repr(bytes)`: `b'…'`, printable ASCII kept, everything else `\xNN`.
184fn repr_bytes(bytes: &[u8]) -> String {
185    let quote = if bytes.contains(&b'\'') && !bytes.contains(&b'"') { b'"' } else { b'\'' };
186    let mut out = String::from("b");
187    out.push(quote as char);
188    for &b in bytes {
189        match b {
190            b'\\' => out.push_str("\\\\"),
191            b'\n' => out.push_str("\\n"),
192            b'\r' => out.push_str("\\r"),
193            b'\t' => out.push_str("\\t"),
194            b if b == quote => {
195                out.push('\\');
196                out.push(b as char);
197            }
198            0x20..=0x7e => out.push(b as char),
199            b => {
200                let _ = write!(out, "\\x{b:02x}");
201            }
202        }
203    }
204    out.push(quote as char);
205    out
206}
207
208/// CPython `repr(float)`: shortest round-trip, switching to scientific notation when the decimal
209/// exponent is `< -4` or `>= 16` (Python's threshold), with `e±NN` (≥2 exponent digits) and a `.0`
210/// forced on whole values in fixed notation. Rust's `{:e}` provides the shortest mantissa/exponent.
211fn repr_float(value: f64) -> String {
212    if value.is_infinite() {
213        return if value < 0.0 { "-inf".to_owned() } else { "inf".to_owned() };
214    }
215    if value.is_nan() {
216        return "nan".to_owned();
217    }
218    let sci = format!("{value:e}"); // e.g. "1e-6", "1.5e0", "-2.5e16"
219    let (mantissa, exp) = match sci.split_once('e') {
220        Some((m, e)) => (m, e.parse::<i32>().unwrap_or(0)),
221        None => (sci.as_str(), 0),
222    };
223    if (-4..16).contains(&exp) {
224        let fixed = format!("{value}");
225        if fixed.contains('.') {
226            fixed
227        } else {
228            format!("{fixed}.0")
229        }
230    } else {
231        let sign = if exp < 0 { '-' } else { '+' };
232        let abs = exp.unsigned_abs();
233        format!("{mantissa}e{sign}{abs:02}")
234    }
235}
236
237#[allow(clippy::float_cmp)] // complex literals carry an exact 0.0 real part
238fn repr_number(num: &Number) -> String {
239    match num {
240        Number::Int(value) => format!("{value}"),
241        Number::Float(value) => repr_float(*value),
242        Number::Complex { real, imag } => {
243            if *real == 0.0 {
244                format!("{imag}j")
245            } else {
246                format!("({real}+{imag}j)")
247            }
248        }
249    }
250}
251
252// ---------------------------------------------------------------------------
253// The serializer: one node at a time, mirroring CPython `ast.dump` `_format`.
254// ---------------------------------------------------------------------------
255
256/// One field's contribution under the `show_empty=False` rule.
257enum F {
258    /// A present value (already formatted) — emitted positionally, or `name=` once keyword mode is on.
259    P(String),
260    /// An empty list (`[]`): buffered then dropped, unless a later present field flushes it back.
261    Empty,
262    /// An optional field that is `None`: dropped, and switches the remaining fields to keyword form.
263    Skip,
264}
265
266fn opt(value: Option<String>) -> F {
267    value.map_or(F::Skip, F::P)
268}
269
270#[allow(clippy::needless_pass_by_value)] // items is a fresh per-field Vec, consumed by the join
271fn flist(items: Vec<String>) -> F {
272    if items.is_empty() {
273        F::Empty
274    } else {
275        F::P(format!("[{}]", items.join(", ")))
276    }
277}
278
279/// CPython-`ast.dump(annotate_fields=False)` serializer over the ruff AST. `locals = Some(set)`
280/// ⇒ cross-name mode (rename bound locals → `_v{n}`, blank the def name); `None` ⇒ clustering
281/// mode (names preserved). `count` mirrors `sum(1 for _ in ast.walk(node))` for the dup-defs size.
282struct Dump<'a> {
283    count: usize,
284    src: &'a str,
285    locals: Option<&'a HashSet<String>>,
286    map: HashMap<String, u32>,
287    /// Cross-name blanks the *top* function's name to `_fn` once; nested defs keep their names
288    /// (CPython sets `fn.name = "_fn"` on the top node only).
289    blanked: bool,
290    /// The *top* def's own decorators are excluded (the dup-defs def text starts at the keyword, so
291    /// the text-based canonical never had them). Consumed by the first def **before** its body is
292    /// walked, so nested defs keep their decorators. Lets a node-based canonical (from a file AST,
293    /// decorators present) match the text-based one byte-for-byte.
294    top_def_pending: bool,
295}
296
297impl<'a> Dump<'a> {
298    fn new(src: &'a str, locals: Option<&'a HashSet<String>>) -> Self {
299        Self { count: 0, src, locals, map: HashMap::new(), blanked: false, top_def_pending: true }
300    }
301
302    /// In cross-name mode, rewrite a bound local to its positional `_v{n}` placeholder.
303    fn rename_id(&mut self, id: &str) -> String {
304        find_dup_defs_canon::alpha_rename(&mut self.map, self.locals, id)
305    }
306
307    /// Emit one AST node `name(field, …)` applying the `show_empty=False` + keyword-switch rule.
308    fn node(&mut self, name: &str, fields: Vec<(&str, F)>) -> String {
309        self.count += 1;
310        let mut args: Vec<String> = Vec::new();
311        let mut pending = 0usize; // buffered empty lists, flushed iff a positional field follows
312        let mut keywords = false;
313        for (fname, field) in fields {
314            match field {
315                F::Skip => keywords = true,
316                F::Empty => {
317                    if !keywords {
318                        pending += 1;
319                    }
320                }
321                F::P(value) => {
322                    if keywords {
323                        args.push(format!("{fname}={value}"));
324                    } else {
325                        for _ in 0..pending {
326                            args.push("[]".to_owned());
327                        }
328                        pending = 0;
329                        args.push(value);
330                    }
331                }
332            }
333        }
334        format!("{name}({})", args.join(", "))
335    }
336
337    fn ctx(&mut self, ctx: ExprContext) -> String {
338        let name = match ctx {
339            ExprContext::Store => "Store",
340            ExprContext::Del => "Del",
341            ExprContext::Load | ExprContext::Invalid => "Load",
342        };
343        self.node(name, vec![])
344    }
345
346    fn operator(&mut self, op: Operator) -> String {
347        let name = match op {
348            Operator::Add => "Add",
349            Operator::Sub => "Sub",
350            Operator::Mult => "Mult",
351            Operator::MatMult => "MatMult",
352            Operator::Div => "Div",
353            Operator::Mod => "Mod",
354            Operator::Pow => "Pow",
355            Operator::LShift => "LShift",
356            Operator::RShift => "RShift",
357            Operator::BitOr => "BitOr",
358            Operator::BitXor => "BitXor",
359            Operator::BitAnd => "BitAnd",
360            Operator::FloorDiv => "FloorDiv",
361        };
362        self.node(name, vec![])
363    }
364
365    fn unaryop(&mut self, op: UnaryOp) -> String {
366        let name = match op {
367            UnaryOp::Invert => "Invert",
368            UnaryOp::Not => "Not",
369            UnaryOp::UAdd => "UAdd",
370            UnaryOp::USub => "USub",
371        };
372        self.node(name, vec![])
373    }
374
375    fn boolop(&mut self, op: BoolOp) -> String {
376        let name = match op {
377            BoolOp::And => "And",
378            BoolOp::Or => "Or",
379        };
380        self.node(name, vec![])
381    }
382
383    fn cmpop(&mut self, op: CmpOp) -> String {
384        let name = match op {
385            CmpOp::Eq => "Eq",
386            CmpOp::NotEq => "NotEq",
387            CmpOp::Lt => "Lt",
388            CmpOp::LtE => "LtE",
389            CmpOp::Gt => "Gt",
390            CmpOp::GtE => "GtE",
391            CmpOp::Is => "Is",
392            CmpOp::IsNot => "IsNot",
393            CmpOp::In => "In",
394            CmpOp::NotIn => "NotIn",
395        };
396        self.node(name, vec![])
397    }
398
399    /// `arg(arg, annotation?, type_comment?)` — the param name is a rename target.
400    fn arg_node(&mut self, param: &Parameter) -> String {
401        let name = self.rename_id(param.name.id.as_str());
402        let annotation = param.annotation.as_ref().map(|a| self.expr(a));
403        self.node(
404            "arg",
405            vec![("arg", F::P(repr_str(&name))), ("annotation", opt(annotation)), ("type_comment", F::Skip)],
406        )
407    }
408
409    /// `arguments(posonlyargs, args, vararg?, kwonlyargs, kw_defaults, kwarg?, defaults)`.
410    fn arguments(&mut self, params: Option<&Parameters>) -> String {
411        let Some(p) = params else {
412            return self.node(
413                "arguments",
414                vec![
415                    ("posonlyargs", F::Empty),
416                    ("args", F::Empty),
417                    ("vararg", F::Skip),
418                    ("kwonlyargs", F::Empty),
419                    ("kw_defaults", F::Empty),
420                    ("kwarg", F::Skip),
421                    ("defaults", F::Empty),
422                ],
423            );
424        };
425        let posonly: Vec<String> = p.posonlyargs.iter().map(|x| self.arg_node(&x.parameter)).collect();
426        let args: Vec<String> = p.args.iter().map(|x| self.arg_node(&x.parameter)).collect();
427        let vararg = p.vararg.as_ref().map(|x| self.arg_node(x));
428        let kwonly: Vec<String> = p.kwonlyargs.iter().map(|x| self.arg_node(&x.parameter)).collect();
429        let kw_defaults: Vec<String> = p
430            .kwonlyargs
431            .iter()
432            .map(|x| match &x.default {
433                Some(d) => self.expr(d),
434                None => "None".to_owned(),
435            })
436            .collect();
437        let kwarg = p.kwarg.as_ref().map(|x| self.arg_node(x));
438        let defaults: Vec<String> = p
439            .posonlyargs
440            .iter()
441            .chain(p.args.iter())
442            .filter_map(|x| x.default.as_ref())
443            .map(|d| self.expr(d))
444            .collect();
445        self.node(
446            "arguments",
447            vec![
448                ("posonlyargs", flist(posonly)),
449                ("args", flist(args)),
450                ("vararg", opt(vararg)),
451                ("kwonlyargs", flist(kwonly)),
452                ("kw_defaults", flist(kw_defaults)),
453                ("kwarg", opt(kwarg)),
454                ("defaults", flist(defaults)),
455            ],
456        )
457    }
458
459    fn keyword(&mut self, kw: &Keyword) -> String {
460        let arg = kw.arg.as_ref().map(|id| repr_str(id.id.as_str()));
461        let value = self.expr(&kw.value);
462        self.node("keyword", vec![("arg", opt(arg)), ("value", F::P(value))])
463    }
464
465    fn comprehension(&mut self, comp: &Comprehension) -> String {
466        let target = self.expr(&comp.target);
467        let iter = self.expr(&comp.iter);
468        let ifs: Vec<String> = comp.ifs.iter().map(|i| self.expr(i)).collect();
469        let is_async = i32::from(comp.is_async);
470        self.node(
471            "comprehension",
472            vec![
473                ("target", F::P(target)),
474                ("iter", F::P(iter)),
475                ("ifs", flist(ifs)),
476                ("is_async", F::P(format!("{is_async}"))),
477            ],
478        )
479    }
480
481    /// An f-string → CPython `JoinedStr([Constant | FormattedValue, …])`. Iterates PARTS (so plain
482    /// `StringLiteral` parts in implicit concatenation are kept), merges consecutive literals into
483    /// one `Constant`, and bakes a `{x=}` debug interpolation as `Constant('x=')` + `FormattedValue`
484    /// — exactly as CPython's parser does.
485    fn fstring_dump(&mut self, fvalue: &ast::FStringValue) -> String {
486        let mut literal = String::new();
487        let mut values: Vec<String> = Vec::new();
488        for part in fvalue {
489            match part {
490                ast::FStringPart::Literal(s) => literal.push_str(&s.value),
491                ast::FStringPart::FString(fs) => self.fstring_elements(&fs.elements, &mut literal, &mut values),
492            }
493        }
494        if !literal.is_empty() {
495            values.push(self.node("Constant", vec![("value", F::P(repr_str(&literal)))]));
496        }
497        self.node("JoinedStr", vec![("values", flist(values))])
498    }
499
500    /// A format-spec → its own `JoinedStr` (CPython wraps format specs the same way).
501    fn formatspec_dump(&mut self, elements: &[ast::InterpolatedStringElement]) -> String {
502        let mut literal = String::new();
503        let mut values: Vec<String> = Vec::new();
504        self.fstring_elements(elements, &mut literal, &mut values);
505        if !literal.is_empty() {
506            values.push(self.node("Constant", vec![("value", F::P(repr_str(&literal)))]));
507        }
508        self.node("JoinedStr", vec![("values", flist(values))])
509    }
510
511    fn fstring_elements(
512        &mut self,
513        elements: &[ast::InterpolatedStringElement],
514        literal: &mut String,
515        values: &mut Vec<String>,
516    ) {
517        for element in elements {
518            match element {
519                ast::InterpolatedStringElement::Literal(lit) => literal.push_str(&lit.value),
520                ast::InterpolatedStringElement::Interpolation(interp) => {
521                    if let Some(dbg) = &interp.debug_text {
522                        let range = interp.expression.range();
523                        let expr_src =
524                            self.src.get(usize::from(range.start())..usize::from(range.end())).unwrap_or("");
525                        let _ = write!(literal, "{}{expr_src}{}", dbg.leading, dbg.trailing);
526                    }
527                    if !literal.is_empty() {
528                        values.push(self.node("Constant", vec![("value", F::P(repr_str(literal)))]));
529                        literal.clear();
530                    }
531                    let value = self.expr(&interp.expression);
532                    let mut conversion = interp.conversion as i8;
533                    if conversion == -1 && interp.debug_text.is_some() && interp.format_spec.is_none() {
534                        conversion = 114; // bare `{x=}` implies `!r`
535                    }
536                    let format_spec = interp.format_spec.as_ref().map(|s| self.formatspec_dump(&s.elements));
537                    values.push(self.node(
538                        "FormattedValue",
539                        vec![
540                            ("value", F::P(value)),
541                            ("conversion", F::P(format!("{conversion}"))),
542                            ("format_spec", opt(format_spec)),
543                        ],
544                    ));
545                }
546            }
547        }
548    }
549
550    fn type_params(&mut self, params: Option<&ast::TypeParams>) -> F {
551        match params {
552            None => F::Empty,
553            Some(tps) => {
554                let items: Vec<String> = tps.type_params.iter().map(|t| self.type_param(t)).collect();
555                flist(items)
556            }
557        }
558    }
559
560    fn type_param(&mut self, tp: &ast::TypeParam) -> String {
561        match tp {
562            ast::TypeParam::TypeVar(t) => {
563                let bound = t.bound.as_ref().map(|b| self.expr(b));
564                let default = t.default.as_ref().map(|d| self.expr(d));
565                self.node(
566                    "TypeVar",
567                    vec![
568                        ("name", F::P(repr_str(t.name.id.as_str()))),
569                        ("bound", opt(bound)),
570                        ("default_value", opt(default)),
571                    ],
572                )
573            }
574            ast::TypeParam::ParamSpec(t) => {
575                let default = t.default.as_ref().map(|d| self.expr(d));
576                self.node(
577                    "ParamSpec",
578                    vec![("name", F::P(repr_str(t.name.id.as_str()))), ("default_value", opt(default))],
579                )
580            }
581            ast::TypeParam::TypeVarTuple(t) => {
582                let default = t.default.as_ref().map(|d| self.expr(d));
583                self.node(
584                    "TypeVarTuple",
585                    vec![("name", F::P(repr_str(t.name.id.as_str()))), ("default_value", opt(default))],
586                )
587            }
588        }
589    }
590
591    fn alias_node(&mut self, alias: &ast::Alias) -> String {
592        let asname = alias.asname.as_ref().map(|n| repr_str(n.id.as_str()));
593        self.node("alias", vec![("name", F::P(repr_str(alias.name.id.as_str()))), ("asname", opt(asname))])
594    }
595
596    fn pattern(&mut self, pat: &ast::Pattern) -> String {
597        match pat {
598            ast::Pattern::MatchValue(p) => {
599                let value = self.expr(&p.value);
600                self.node("MatchValue", vec![("value", F::P(value))])
601            }
602            ast::Pattern::MatchSingleton(p) => {
603                let value = match p.value {
604                    ast::Singleton::None => "None",
605                    ast::Singleton::True => "True",
606                    ast::Singleton::False => "False",
607                };
608                self.node("MatchSingleton", vec![("value", F::P(value.to_owned()))])
609            }
610            ast::Pattern::MatchSequence(p) => {
611                let patterns: Vec<String> = p.patterns.iter().map(|x| self.pattern(x)).collect();
612                self.node("MatchSequence", vec![("patterns", flist(patterns))])
613            }
614            ast::Pattern::MatchMapping(p) => {
615                let keys: Vec<String> = p.keys.iter().map(|k| self.expr(k)).collect();
616                let patterns: Vec<String> = p.patterns.iter().map(|x| self.pattern(x)).collect();
617                let rest = p.rest.as_ref().map(|id| repr_str(id.id.as_str()));
618                self.node(
619                    "MatchMapping",
620                    vec![("keys", flist(keys)), ("patterns", flist(patterns)), ("rest", opt(rest))],
621                )
622            }
623            ast::Pattern::MatchClass(p) => {
624                let cls = self.expr(&p.cls);
625                let patterns: Vec<String> = p.arguments.patterns.iter().map(|x| self.pattern(x)).collect();
626                let kwd_attrs: Vec<String> =
627                    p.arguments.keywords.iter().map(|k| repr_str(k.attr.id.as_str())).collect();
628                let kwd_patterns: Vec<String> =
629                    p.arguments.keywords.iter().map(|k| self.pattern(&k.pattern)).collect();
630                self.node(
631                    "MatchClass",
632                    vec![
633                        ("cls", F::P(cls)),
634                        ("patterns", flist(patterns)),
635                        ("kwd_attrs", flist(kwd_attrs)),
636                        ("kwd_patterns", flist(kwd_patterns)),
637                    ],
638                )
639            }
640            ast::Pattern::MatchStar(p) => {
641                let name = p.name.as_ref().map(|id| repr_str(id.id.as_str()));
642                self.node("MatchStar", vec![("name", opt(name))])
643            }
644            ast::Pattern::MatchAs(p) => {
645                let pattern = p.pattern.as_ref().map(|x| self.pattern(x));
646                let name = p.name.as_ref().map(|id| repr_str(id.id.as_str()));
647                self.node("MatchAs", vec![("pattern", opt(pattern)), ("name", opt(name))])
648            }
649            ast::Pattern::MatchOr(p) => {
650                let patterns: Vec<String> = p.patterns.iter().map(|x| self.pattern(x)).collect();
651                self.node("MatchOr", vec![("patterns", flist(patterns))])
652            }
653        }
654    }
655
656    fn match_case(&mut self, case: &ast::MatchCase) -> String {
657        let pattern = self.pattern(&case.pattern);
658        let guard = case.guard.as_ref().map(|g| self.expr(g));
659        let body = self.body(&case.body, false);
660        self.node("match_case", vec![("pattern", F::P(pattern)), ("guard", opt(guard)), ("body", flist(body))])
661    }
662
663    /// A statement body; `strip` drops a leading string-literal docstring (def/class bodies only),
664    /// replacing an emptied body with `[Pass()]` exactly like CPython's `_strip_docstring`.
665    fn body(&mut self, stmts: &[Stmt], strip: bool) -> Vec<String> {
666        if strip && stmts.first().is_some_and(is_docstring) {
667            let rest = &stmts[1..];
668            if rest.is_empty() {
669                return vec![self.node("Pass", vec![])];
670            }
671            return rest.iter().map(|s| self.stmt(s)).collect();
672        }
673        stmts.iter().map(|s| self.stmt(s)).collect()
674    }
675
676    /// Rebuild CPython's nested-`If` `orelse` from ruff's flat `elif_else_clauses`.
677    fn elif_orelse(&mut self, clauses: &[ast::ElifElseClause]) -> Vec<String> {
678        let Some((first, rest)) = clauses.split_first() else { return Vec::new() };
679        match &first.test {
680            Some(test) => {
681                let test_s = self.expr(test);
682                let body = self.body(&first.body, false);
683                let orelse = self.elif_orelse(rest);
684                vec![self.node(
685                    "If",
686                    vec![("test", F::P(test_s)), ("body", flist(body)), ("orelse", flist(orelse))],
687                )]
688            }
689            None => self.body(&first.body, false),
690        }
691    }
692
693    #[allow(clippy::too_many_lines)]
694    fn expr(&mut self, expr: &Expr) -> String {
695        match expr {
696            Expr::Name(n) => {
697                let id = self.rename_id(n.id.as_str());
698                let ctx = self.ctx(n.ctx);
699                self.node("Name", vec![("id", F::P(repr_str(&id))), ("ctx", F::P(ctx))])
700            }
701            Expr::Attribute(a) => {
702                let value = self.expr(&a.value);
703                let ctx = self.ctx(a.ctx);
704                self.node(
705                    "Attribute",
706                    vec![("value", F::P(value)), ("attr", F::P(repr_str(a.attr.id.as_str()))), ("ctx", F::P(ctx))],
707                )
708            }
709            Expr::Call(c) => {
710                let func = self.expr(&c.func);
711                let args: Vec<String> = c.arguments.args.iter().map(|a| self.expr(a)).collect();
712                let keywords: Vec<String> = c.arguments.keywords.iter().map(|k| self.keyword(k)).collect();
713                self.node("Call", vec![("func", F::P(func)), ("args", flist(args)), ("keywords", flist(keywords))])
714            }
715            Expr::BinOp(b) => {
716                let left = self.expr(&b.left);
717                let op = self.operator(b.op);
718                let right = self.expr(&b.right);
719                self.node("BinOp", vec![("left", F::P(left)), ("op", F::P(op)), ("right", F::P(right))])
720            }
721            Expr::UnaryOp(u) => {
722                let op = self.unaryop(u.op);
723                let operand = self.expr(&u.operand);
724                self.node("UnaryOp", vec![("op", F::P(op)), ("operand", F::P(operand))])
725            }
726            Expr::BoolOp(b) => {
727                let op = self.boolop(b.op);
728                let values: Vec<String> = b.values.iter().map(|v| self.expr(v)).collect();
729                self.node("BoolOp", vec![("op", F::P(op)), ("values", flist(values))])
730            }
731            Expr::Compare(c) => {
732                let left = self.expr(&c.left);
733                let ops: Vec<String> = c.ops.iter().map(|o| self.cmpop(*o)).collect();
734                let comparators: Vec<String> = c.comparators.iter().map(|e| self.expr(e)).collect();
735                self.node(
736                    "Compare",
737                    vec![("left", F::P(left)), ("ops", flist(ops)), ("comparators", flist(comparators))],
738                )
739            }
740            Expr::NumberLiteral(n) => self.node("Constant", vec![("value", F::P(repr_number(&n.value)))]),
741            Expr::StringLiteral(s) => {
742                self.node("Constant", vec![("value", F::P(repr_str(s.value.to_str())))])
743            }
744            Expr::BytesLiteral(b) => {
745                let bytes: Vec<u8> = b.value.bytes().collect();
746                self.node("Constant", vec![("value", F::P(repr_bytes(&bytes)))])
747            }
748            Expr::BooleanLiteral(b) => {
749                let value = if b.value { "True" } else { "False" };
750                self.node("Constant", vec![("value", F::P(value.to_owned()))])
751            }
752            // IpyEscapeCommand has no CPython analogue (Jupyter only); a `None` Constant placeholder
753            // it that never occurs in real modules.
754            Expr::NoneLiteral(_) | Expr::IpyEscapeCommand(_) => {
755                self.node("Constant", vec![("value", F::P("None".to_owned()))])
756            }
757            Expr::EllipsisLiteral(_) => self.node("Constant", vec![("value", F::P("Ellipsis".to_owned()))]),
758            Expr::Subscript(s) => {
759                let value = self.expr(&s.value);
760                let slice = self.expr(&s.slice);
761                let ctx = self.ctx(s.ctx);
762                self.node(
763                    "Subscript",
764                    vec![("value", F::P(value)), ("slice", F::P(slice)), ("ctx", F::P(ctx))],
765                )
766            }
767            Expr::Starred(s) => {
768                let value = self.expr(&s.value);
769                let ctx = self.ctx(s.ctx);
770                self.node("Starred", vec![("value", F::P(value)), ("ctx", F::P(ctx))])
771            }
772            Expr::List(l) => {
773                let elts: Vec<String> = l.elts.iter().map(|e| self.expr(e)).collect();
774                let ctx = self.ctx(l.ctx);
775                self.node("List", vec![("elts", flist(elts)), ("ctx", F::P(ctx))])
776            }
777            Expr::Tuple(t) => {
778                let elts: Vec<String> = t.elts.iter().map(|e| self.expr(e)).collect();
779                let ctx = self.ctx(t.ctx);
780                self.node("Tuple", vec![("elts", flist(elts)), ("ctx", F::P(ctx))])
781            }
782            Expr::Set(s) => {
783                let elts: Vec<String> = s.elts.iter().map(|e| self.expr(e)).collect();
784                self.node("Set", vec![("elts", flist(elts))])
785            }
786            Expr::Dict(d) => {
787                let keys: Vec<String> = d
788                    .items
789                    .iter()
790                    .map(|i| i.key.as_ref().map_or_else(|| "None".to_owned(), |k| self.expr(k)))
791                    .collect();
792                let values: Vec<String> = d.items.iter().map(|i| self.expr(&i.value)).collect();
793                self.node("Dict", vec![("keys", flist(keys)), ("values", flist(values))])
794            }
795            Expr::Slice(s) => {
796                let lower = s.lower.as_ref().map(|e| self.expr(e));
797                let upper = s.upper.as_ref().map(|e| self.expr(e));
798                let step = s.step.as_ref().map(|e| self.expr(e));
799                self.node("Slice", vec![("lower", opt(lower)), ("upper", opt(upper)), ("step", opt(step))])
800            }
801            Expr::If(i) => {
802                let test = self.expr(&i.test);
803                let body = self.expr(&i.body);
804                let orelse = self.expr(&i.orelse);
805                self.node("IfExp", vec![("test", F::P(test)), ("body", F::P(body)), ("orelse", F::P(orelse))])
806            }
807            Expr::Lambda(l) => {
808                let args = self.arguments(l.parameters.as_deref());
809                let body = self.expr(&l.body);
810                self.node("Lambda", vec![("args", F::P(args)), ("body", F::P(body))])
811            }
812            Expr::Named(n) => {
813                let target = self.expr(&n.target);
814                let value = self.expr(&n.value);
815                self.node("NamedExpr", vec![("target", F::P(target)), ("value", F::P(value))])
816            }
817            Expr::Await(a) => {
818                let value = self.expr(&a.value);
819                self.node("Await", vec![("value", F::P(value))])
820            }
821            Expr::Yield(y) => {
822                let value = y.value.as_ref().map(|v| self.expr(v));
823                self.node("Yield", vec![("value", opt(value))])
824            }
825            Expr::YieldFrom(y) => {
826                let value = self.expr(&y.value);
827                self.node("YieldFrom", vec![("value", F::P(value))])
828            }
829            Expr::ListComp(c) => {
830                let elt = self.expr(&c.elt);
831                let generators: Vec<String> = c.generators.iter().map(|g| self.comprehension(g)).collect();
832                self.node("ListComp", vec![("elt", F::P(elt)), ("generators", flist(generators))])
833            }
834            Expr::SetComp(c) => {
835                let elt = self.expr(&c.elt);
836                let generators: Vec<String> = c.generators.iter().map(|g| self.comprehension(g)).collect();
837                self.node("SetComp", vec![("elt", F::P(elt)), ("generators", flist(generators))])
838            }
839            Expr::Generator(c) => {
840                let elt = self.expr(&c.elt);
841                let generators: Vec<String> = c.generators.iter().map(|g| self.comprehension(g)).collect();
842                self.node("GeneratorExp", vec![("elt", F::P(elt)), ("generators", flist(generators))])
843            }
844            Expr::DictComp(c) => {
845                let key = self.expr(&c.key);
846                let value = self.expr(&c.value);
847                let generators: Vec<String> = c.generators.iter().map(|g| self.comprehension(g)).collect();
848                self.node(
849                    "DictComp",
850                    vec![("key", F::P(key)), ("value", F::P(value)), ("generators", flist(generators))],
851                )
852            }
853            Expr::FString(f) => self.fstring_dump(&f.value),
854            Expr::TString(_) => {
855                // PEP 750 template strings (3.14, vanishingly rare): CPython's `Interpolation.str`
856                // field can't be reproduced from the ruff AST, so emit a minimal `TemplateStr()`.
857                self.node("TemplateStr", vec![("values", F::Empty)])
858            }
859        }
860    }
861
862    #[allow(clippy::too_many_lines)]
863    fn stmt(&mut self, stmt: &Stmt) -> String {
864        match stmt {
865            Stmt::FunctionDef(f) => {
866                let strip_deco = self.top_def_pending; // capture + clear BEFORE walking the body
867                self.top_def_pending = false;
868                let cpy = if f.is_async { "AsyncFunctionDef" } else { "FunctionDef" };
869                let name = if self.locals.is_some() && !self.blanked {
870                    self.blanked = true;
871                    "_fn".to_owned()
872                } else {
873                    f.name.id.as_str().to_owned()
874                };
875                let args = self.arguments(Some(&f.parameters));
876                let body = self.body(&f.body, true);
877                let decorators: Vec<String> =
878                    if strip_deco { Vec::new() } else { f.decorator_list.iter().map(|d| self.expr(&d.expression)).collect() };
879                let returns = f.returns.as_ref().map(|r| self.expr(r));
880                let type_params = self.type_params(f.type_params.as_deref());
881                self.node(
882                    cpy,
883                    vec![
884                        ("name", F::P(repr_str(&name))),
885                        ("args", F::P(args)),
886                        ("body", flist(body)),
887                        ("decorator_list", flist(decorators)),
888                        ("returns", opt(returns)),
889                        ("type_comment", F::Skip),
890                        ("type_params", type_params),
891                    ],
892                )
893            }
894            Stmt::ClassDef(c) => {
895                let strip_deco = self.top_def_pending; // capture + clear BEFORE walking the body
896                self.top_def_pending = false;
897                let name = c.name.id.as_str().to_owned();
898                let (bases, keywords) = match &c.arguments {
899                    Some(a) => {
900                        let bases = a.args.iter().map(|b| self.expr(b)).collect();
901                        let keywords = a.keywords.iter().map(|k| self.keyword(k)).collect();
902                        (bases, keywords)
903                    }
904                    None => (Vec::new(), Vec::new()),
905                };
906                let body = self.body(&c.body, true);
907                let decorators: Vec<String> =
908                    if strip_deco { Vec::new() } else { c.decorator_list.iter().map(|d| self.expr(&d.expression)).collect() };
909                let type_params = self.type_params(c.type_params.as_deref());
910                self.node(
911                    "ClassDef",
912                    vec![
913                        ("name", F::P(repr_str(&name))),
914                        ("bases", flist(bases)),
915                        ("keywords", flist(keywords)),
916                        ("body", flist(body)),
917                        ("decorator_list", flist(decorators)),
918                        ("type_params", type_params),
919                    ],
920                )
921            }
922            Stmt::Return(r) => {
923                let value = r.value.as_ref().map(|v| self.expr(v));
924                self.node("Return", vec![("value", opt(value))])
925            }
926            Stmt::Delete(d) => {
927                let targets: Vec<String> = d.targets.iter().map(|t| self.expr(t)).collect();
928                self.node("Delete", vec![("targets", flist(targets))])
929            }
930            Stmt::Assign(a) => {
931                let targets: Vec<String> = a.targets.iter().map(|t| self.expr(t)).collect();
932                let value = self.expr(&a.value);
933                self.node(
934                    "Assign",
935                    vec![("targets", flist(targets)), ("value", F::P(value)), ("type_comment", F::Skip)],
936                )
937            }
938            Stmt::AugAssign(a) => {
939                let target = self.expr(&a.target);
940                let op = self.operator(a.op);
941                let value = self.expr(&a.value);
942                self.node(
943                    "AugAssign",
944                    vec![("target", F::P(target)), ("op", F::P(op)), ("value", F::P(value))],
945                )
946            }
947            Stmt::AnnAssign(a) => {
948                let target = self.expr(&a.target);
949                let annotation = self.expr(&a.annotation);
950                let value = a.value.as_ref().map(|v| self.expr(v));
951                let simple = i32::from(a.simple);
952                self.node(
953                    "AnnAssign",
954                    vec![
955                        ("target", F::P(target)),
956                        ("annotation", F::P(annotation)),
957                        ("value", opt(value)),
958                        ("simple", F::P(format!("{simple}"))),
959                    ],
960                )
961            }
962            Stmt::TypeAlias(t) => {
963                let name = self.expr(&t.name);
964                let type_params = self.type_params(t.type_params.as_deref());
965                let value = self.expr(&t.value);
966                self.node(
967                    "TypeAlias",
968                    vec![("name", F::P(name)), ("type_params", type_params), ("value", F::P(value))],
969                )
970            }
971            Stmt::For(f) => {
972                let cpy = if f.is_async { "AsyncFor" } else { "For" };
973                let target = self.expr(&f.target);
974                let iter = self.expr(&f.iter);
975                let body = self.body(&f.body, false);
976                let orelse = self.body(&f.orelse, false);
977                self.node(
978                    cpy,
979                    vec![
980                        ("target", F::P(target)),
981                        ("iter", F::P(iter)),
982                        ("body", flist(body)),
983                        ("orelse", flist(orelse)),
984                        ("type_comment", F::Skip),
985                    ],
986                )
987            }
988            Stmt::While(w) => {
989                let test = self.expr(&w.test);
990                let body = self.body(&w.body, false);
991                let orelse = self.body(&w.orelse, false);
992                self.node(
993                    "While",
994                    vec![("test", F::P(test)), ("body", flist(body)), ("orelse", flist(orelse))],
995                )
996            }
997            Stmt::If(i) => {
998                let test = self.expr(&i.test);
999                let body = self.body(&i.body, false);
1000                let orelse = self.elif_orelse(&i.elif_else_clauses);
1001                self.node(
1002                    "If",
1003                    vec![("test", F::P(test)), ("body", flist(body)), ("orelse", flist(orelse))],
1004                )
1005            }
1006            Stmt::With(w) => {
1007                let cpy = if w.is_async { "AsyncWith" } else { "With" };
1008                let items: Vec<String> = w
1009                    .items
1010                    .iter()
1011                    .map(|item| {
1012                        let context_expr = self.expr(&item.context_expr);
1013                        let optional_vars = item.optional_vars.as_ref().map(|v| self.expr(v));
1014                        self.node(
1015                            "withitem",
1016                            vec![("context_expr", F::P(context_expr)), ("optional_vars", opt(optional_vars))],
1017                        )
1018                    })
1019                    .collect();
1020                let body = self.body(&w.body, false);
1021                self.node(cpy, vec![("items", flist(items)), ("body", flist(body)), ("type_comment", F::Skip)])
1022            }
1023            Stmt::Raise(r) => {
1024                let exc = r.exc.as_ref().map(|e| self.expr(e));
1025                let cause = r.cause.as_ref().map(|c| self.expr(c));
1026                self.node("Raise", vec![("exc", opt(exc)), ("cause", opt(cause))])
1027            }
1028            Stmt::Try(t) => {
1029                let cpy = if t.is_star { "TryStar" } else { "Try" };
1030                let body = self.body(&t.body, false);
1031                let handlers: Vec<String> = t
1032                    .handlers
1033                    .iter()
1034                    .map(|h| {
1035                        let ExceptHandler::ExceptHandler(handler) = h;
1036                        let typ = handler.type_.as_ref().map(|e| self.expr(e));
1037                        let name = handler.name.as_ref().map(|n| repr_str(&self.rename_id(n.id.as_str())));
1038                        let body = self.body(&handler.body, false);
1039                        self.node(
1040                            "ExceptHandler",
1041                            vec![("type", opt(typ)), ("name", opt(name)), ("body", flist(body))],
1042                        )
1043                    })
1044                    .collect();
1045                let orelse = self.body(&t.orelse, false);
1046                let finalbody = self.body(&t.finalbody, false);
1047                self.node(
1048                    cpy,
1049                    vec![
1050                        ("body", flist(body)),
1051                        ("handlers", flist(handlers)),
1052                        ("orelse", flist(orelse)),
1053                        ("finalbody", flist(finalbody)),
1054                    ],
1055                )
1056            }
1057            Stmt::Assert(a) => {
1058                let test = self.expr(&a.test);
1059                let msg = a.msg.as_ref().map(|m| self.expr(m));
1060                self.node("Assert", vec![("test", F::P(test)), ("msg", opt(msg))])
1061            }
1062            Stmt::Import(i) => {
1063                let names: Vec<String> = i.names.iter().map(|a| self.alias_node(a)).collect();
1064                self.node("Import", vec![("names", flist(names))])
1065            }
1066            Stmt::ImportFrom(i) => {
1067                let module = i.module.as_ref().map(|m| repr_str(m.id.as_str()));
1068                let names: Vec<String> = i.names.iter().map(|a| self.alias_node(a)).collect();
1069                self.node(
1070                    "ImportFrom",
1071                    vec![("module", opt(module)), ("names", flist(names)), ("level", F::P(format!("{}", i.level)))],
1072                )
1073            }
1074            Stmt::Global(g) => {
1075                let names: Vec<String> = g.names.iter().map(|n| repr_str(n.id.as_str())).collect();
1076                self.node("Global", vec![("names", flist(names))])
1077            }
1078            Stmt::Nonlocal(n) => {
1079                let names: Vec<String> = n.names.iter().map(|x| repr_str(x.id.as_str())).collect();
1080                self.node("Nonlocal", vec![("names", flist(names))])
1081            }
1082            Stmt::Expr(e) => {
1083                let value = self.expr(&e.value);
1084                self.node("Expr", vec![("value", F::P(value))])
1085            }
1086            Stmt::Match(m) => {
1087                let subject = self.expr(&m.subject);
1088                let cases: Vec<String> = m.cases.iter().map(|c| self.match_case(c)).collect();
1089                self.node("Match", vec![("subject", F::P(subject)), ("cases", flist(cases))])
1090            }
1091            // IpyEscapeCommand (Jupyter only) has no CPython node; treat as Pass — never occurs.
1092            Stmt::Pass(_) | Stmt::IpyEscapeCommand(_) => self.node("Pass", vec![]),
1093            Stmt::Break(_) => self.node("Break", vec![]),
1094            Stmt::Continue(_) => self.node("Continue", vec![]),
1095        }
1096    }
1097}
1098
1099fn is_docstring(stmt: &Stmt) -> bool {
1100    matches!(stmt, Stmt::Expr(e) if matches!(e.value.as_ref(), Expr::StringLiteral(_)))
1101}
1102
1103// ---------------------------------------------------------------------------
1104// Unparse — CPython 3.14 `ast.unparse` (`_ast_unparse.Unparser`) reproduced over the ruff AST.
1105// Used ONLY to produce the Type-3 (ECScan) `lines` (= `ast.unparse(normalized_fn).splitlines()`
1106// stripped), so the IDF/cosine matches the CPython `ast` reference exactly. Precedence is threaded as a
1107// `ctx` argument (each node is visited once after its parent sets its precedence), reproducing
1108// CPython's `set_precedence`/`require_parens` parenthesisation. Indentation is omitted (lines are
1109// stripped anyway); newlines fall only at statement/clause boundaries.
1110// ---------------------------------------------------------------------------
1111
1112mod prec {
1113    pub const NAMED_EXPR: u8 = 1;
1114    pub const TUPLE: u8 = 2;
1115    pub const YIELD: u8 = 3;
1116    pub const TEST: u8 = 4;
1117    pub const OR: u8 = 5;
1118    pub const AND: u8 = 6;
1119    pub const NOT: u8 = 7;
1120    pub const CMP: u8 = 8;
1121    pub const EXPR: u8 = 9;
1122    pub const BOR: u8 = 9;
1123    pub const BXOR: u8 = 10;
1124    pub const BAND: u8 = 11;
1125    pub const SHIFT: u8 = 12;
1126    pub const ARITH: u8 = 13;
1127    pub const TERM: u8 = 14;
1128    pub const FACTOR: u8 = 15;
1129    pub const POWER: u8 = 16;
1130    pub const AWAIT: u8 = 17;
1131    pub const ATOM: u8 = 18;
1132}
1133
1134fn next_prec(p: u8) -> u8 {
1135    if p < prec::ATOM {
1136        p + 1
1137    } else {
1138        prec::ATOM
1139    }
1140}
1141
1142fn operator_tag(op: Operator) -> &'static str {
1143    match op {
1144        Operator::Add => "+",
1145        Operator::Sub => "-",
1146        Operator::Mult => "*",
1147        Operator::MatMult => "@",
1148        Operator::Div => "/",
1149        Operator::Mod => "%",
1150        Operator::Pow => "**",
1151        Operator::LShift => "<<",
1152        Operator::RShift => ">>",
1153        Operator::BitOr => "|",
1154        Operator::BitXor => "^",
1155        Operator::BitAnd => "&",
1156        Operator::FloorDiv => "//",
1157    }
1158}
1159
1160fn binop_prec(op: Operator) -> u8 {
1161    match op {
1162        Operator::Add | Operator::Sub => prec::ARITH,
1163        Operator::Mult | Operator::MatMult | Operator::Div | Operator::Mod | Operator::FloorDiv => prec::TERM,
1164        Operator::LShift | Operator::RShift => prec::SHIFT,
1165        Operator::BitOr => prec::BOR,
1166        Operator::BitXor => prec::BXOR,
1167        Operator::BitAnd => prec::BAND,
1168        Operator::Pow => prec::POWER,
1169    }
1170}
1171
1172fn cmpop_tag(op: CmpOp) -> &'static str {
1173    match op {
1174        CmpOp::Eq => "==",
1175        CmpOp::NotEq => "!=",
1176        CmpOp::Lt => "<",
1177        CmpOp::LtE => "<=",
1178        CmpOp::Gt => ">",
1179        CmpOp::GtE => ">=",
1180        CmpOp::Is => "is",
1181        CmpOp::IsNot => "is not",
1182        CmpOp::In => "in",
1183        CmpOp::NotIn => "not in",
1184    }
1185}
1186
1187/// Python `str.isprintable` (approx): control + the common format/separator chars are not printable.
1188fn is_printable(c: char) -> bool {
1189    c == ' ' || !needs_escape(c)
1190}
1191
1192/// One char as Python's `unicode_escape` would render it (used for non-printable / backslash).
1193fn unicode_escape(c: char) -> String {
1194    match c {
1195        '\\' => "\\\\".to_owned(),
1196        '\n' => "\\n".to_owned(),
1197        '\r' => "\\r".to_owned(),
1198        '\t' => "\\t".to_owned(),
1199        c => {
1200            let cp = c as u32;
1201            if cp < 0x100 {
1202                format!("\\x{cp:02x}")
1203            } else if cp < 0x10000 {
1204                format!("\\u{cp:04x}")
1205            } else {
1206                format!("\\U{cp:08x}")
1207            }
1208        }
1209    }
1210}
1211
1212/// CPython unparse `escape_char`: keep `\n`/`\t` literal unless `esw`, else escape backslash + any
1213/// non-printable char (used for f-string literal parts; plain string Constants use `repr` instead).
1214fn escape_char(c: char, esw: bool) -> String {
1215    if !esw && (c == '\n' || c == '\t') {
1216        return c.to_string();
1217    }
1218    if c == '\\' || !is_printable(c) {
1219        return unicode_escape(c);
1220    }
1221    c.to_string()
1222}
1223
1224/// Append `s` as an f-string literal chunk: double `{`/`}`, then escape each char (esw=true).
1225fn fstring_literal_into(body: &mut String, s: &str) {
1226    let doubled = s.replace('{', "{{").replace('}', "}}");
1227    for c in doubled.chars() {
1228        body.push_str(&escape_char(c, true));
1229    }
1230}
1231
1232fn fstring_literal_escaped(s: &str) -> String {
1233    let mut body = String::new();
1234    fstring_literal_into(&mut body, s);
1235    body
1236}
1237
1238struct Unparse<'a> {
1239    out: String,
1240    src: &'a str,
1241    locals: Option<&'a HashSet<String>>,
1242    map: HashMap<String, u32>,
1243    blanked: bool,
1244}
1245
1246impl Unparse<'_> {
1247    fn rename_id(&mut self, id: &str) -> String {
1248        find_dup_defs_canon::alpha_rename(&mut self.map, self.locals, id)
1249    }
1250
1251    /// Start a new logical line (CPython `fill`): a newline unless we're at the start.
1252    fn fill(&mut self, text: &str) {
1253        if !self.out.is_empty() {
1254            self.out.push('\n');
1255        }
1256        self.out.push_str(text);
1257    }
1258
1259    fn write(&mut self, text: &str) {
1260        self.out.push_str(text);
1261    }
1262
1263    /// Render `f(self)` into a fresh buffer and return it (CPython `buffered`).
1264    fn buffered(&mut self, f: impl FnOnce(&mut Self)) -> String {
1265        let saved = std::mem::take(&mut self.out);
1266        f(self);
1267        std::mem::replace(&mut self.out, saved)
1268    }
1269
1270    fn buffered_expr(&mut self, e: &Expr, ctx: u8) -> String {
1271        self.buffered(|s| s.expr(e, ctx))
1272    }
1273
1274    /// A def/class body: skip a leading docstring (→ `pass` if that empties it), then each stmt.
1275    fn body(&mut self, stmts: &[Stmt], strip: bool) {
1276        let slice = if strip && stmts.first().is_some_and(is_docstring) { &stmts[1..] } else { stmts };
1277        if slice.is_empty() {
1278            self.fill("pass");
1279            return;
1280        }
1281        for stmt in slice {
1282            self.stmt(stmt);
1283        }
1284    }
1285
1286    #[allow(clippy::too_many_lines)]
1287    fn stmt(&mut self, stmt: &Stmt) {
1288        match stmt {
1289            Stmt::FunctionDef(f) => {
1290                let kw = if f.is_async { "async def " } else { "def " };
1291                let name = if self.locals.is_some() && !self.blanked {
1292                    self.blanked = true;
1293                    "_fn".to_owned()
1294                } else {
1295                    f.name.id.as_str().to_owned()
1296                };
1297                self.fill(&format!("{kw}{name}"));
1298                self.type_params(f.type_params.as_deref());
1299                self.write("(");
1300                self.arguments(Some(&f.parameters));
1301                self.write(")");
1302                if let Some(returns) = &f.returns {
1303                    self.write(" -> ");
1304                    self.expr(returns, prec::TEST);
1305                }
1306                self.write(":");
1307                self.body(&f.body, true);
1308            }
1309            Stmt::ClassDef(c) => {
1310                self.fill(&format!("class {}", c.name.id.as_str()));
1311                self.type_params(c.type_params.as_deref());
1312                let has_args = c.arguments.as_ref().is_some_and(|a| !a.args.is_empty() || !a.keywords.is_empty());
1313                if has_args {
1314                    self.write("(");
1315                    if let Some(a) = &c.arguments {
1316                        let mut first = true;
1317                        for base in &a.args {
1318                            if !first {
1319                                self.write(", ");
1320                            }
1321                            first = false;
1322                            self.expr(base, prec::TEST);
1323                        }
1324                        for kw in &a.keywords {
1325                            if !first {
1326                                self.write(", ");
1327                            }
1328                            first = false;
1329                            self.keyword(kw);
1330                        }
1331                    }
1332                    self.write(")");
1333                }
1334                self.write(":");
1335                self.body(&c.body, true);
1336            }
1337            Stmt::Return(r) => {
1338                self.fill("return");
1339                if let Some(v) = &r.value {
1340                    self.write(" ");
1341                    self.expr(v, prec::TEST);
1342                }
1343            }
1344            Stmt::Delete(d) => {
1345                self.fill("del ");
1346                self.comma_exprs(&d.targets);
1347            }
1348            Stmt::Assign(a) => {
1349                self.fill("");
1350                for target in &a.targets {
1351                    self.expr(target, prec::TUPLE);
1352                    self.write(" = ");
1353                }
1354                self.expr(&a.value, prec::TEST);
1355            }
1356            Stmt::AugAssign(a) => {
1357                self.fill("");
1358                self.expr(&a.target, prec::TEST);
1359                self.write(&format!(" {}= ", operator_tag(a.op)));
1360                self.expr(&a.value, prec::TEST);
1361            }
1362            Stmt::AnnAssign(a) => {
1363                self.fill("");
1364                let parens = !a.simple && matches!(a.target.as_ref(), Expr::Name(_));
1365                if parens {
1366                    self.write("(");
1367                }
1368                self.expr(&a.target, prec::TEST);
1369                if parens {
1370                    self.write(")");
1371                }
1372                self.write(": ");
1373                self.expr(&a.annotation, prec::TEST);
1374                if let Some(v) = &a.value {
1375                    self.write(" = ");
1376                    self.expr(v, prec::TEST);
1377                }
1378            }
1379            Stmt::For(f) => {
1380                self.fill(if f.is_async { "async for " } else { "for " });
1381                self.expr(&f.target, prec::TUPLE);
1382                self.write(" in ");
1383                self.expr(&f.iter, prec::TEST);
1384                self.write(":");
1385                self.body(&f.body, false);
1386                if !f.orelse.is_empty() {
1387                    self.fill("else:");
1388                    self.body(&f.orelse, false);
1389                }
1390            }
1391            Stmt::While(w) => {
1392                self.fill("while ");
1393                self.expr(&w.test, prec::TEST);
1394                self.write(":");
1395                self.body(&w.body, false);
1396                if !w.orelse.is_empty() {
1397                    self.fill("else:");
1398                    self.body(&w.orelse, false);
1399                }
1400            }
1401            Stmt::If(i) => {
1402                self.fill("if ");
1403                self.expr(&i.test, prec::TEST);
1404                self.write(":");
1405                self.body(&i.body, false);
1406                for clause in &i.elif_else_clauses {
1407                    if let Some(test) = &clause.test {
1408                        self.fill("elif ");
1409                        self.expr(test, prec::TEST);
1410                        self.write(":");
1411                        self.body(&clause.body, false);
1412                    } else {
1413                        self.fill("else:");
1414                        self.body(&clause.body, false);
1415                    }
1416                }
1417            }
1418            Stmt::With(w) => {
1419                self.fill(if w.is_async { "async with " } else { "with " });
1420                let mut first = true;
1421                for item in &w.items {
1422                    if !first {
1423                        self.write(", ");
1424                    }
1425                    first = false;
1426                    self.expr(&item.context_expr, prec::TEST);
1427                    if let Some(v) = &item.optional_vars {
1428                        self.write(" as ");
1429                        self.expr(v, prec::TEST);
1430                    }
1431                }
1432                self.write(":");
1433                self.body(&w.body, false);
1434            }
1435            Stmt::Raise(r) => {
1436                self.fill("raise");
1437                if let Some(exc) = &r.exc {
1438                    self.write(" ");
1439                    self.expr(exc, prec::TEST);
1440                    if let Some(cause) = &r.cause {
1441                        self.write(" from ");
1442                        self.expr(cause, prec::TEST);
1443                    }
1444                }
1445            }
1446            Stmt::Try(t) => {
1447                self.fill("try:");
1448                self.body(&t.body, false);
1449                for handler in &t.handlers {
1450                    let ExceptHandler::ExceptHandler(h) = handler;
1451                    self.fill(if t.is_star { "except*" } else { "except" });
1452                    if let Some(typ) = &h.type_ {
1453                        self.write(" ");
1454                        self.expr(typ, prec::TEST);
1455                    }
1456                    if let Some(name) = &h.name {
1457                        self.write(" as ");
1458                        let renamed = self.rename_id(name.id.as_str());
1459                        self.write(&renamed);
1460                    }
1461                    self.write(":");
1462                    self.body(&h.body, false);
1463                }
1464                if !t.orelse.is_empty() {
1465                    self.fill("else:");
1466                    self.body(&t.orelse, false);
1467                }
1468                if !t.finalbody.is_empty() {
1469                    self.fill("finally:");
1470                    self.body(&t.finalbody, false);
1471                }
1472            }
1473            Stmt::Assert(a) => {
1474                self.fill("assert ");
1475                self.expr(&a.test, prec::TEST);
1476                if let Some(msg) = &a.msg {
1477                    self.write(", ");
1478                    self.expr(msg, prec::TEST);
1479                }
1480            }
1481            Stmt::Import(i) => {
1482                self.fill("import ");
1483                let mut first = true;
1484                for alias in &i.names {
1485                    if !first {
1486                        self.write(", ");
1487                    }
1488                    first = false;
1489                    self.alias(alias);
1490                }
1491            }
1492            Stmt::ImportFrom(i) => {
1493                self.fill("from ");
1494                for _ in 0..i.level {
1495                    self.write(".");
1496                }
1497                if let Some(m) = &i.module {
1498                    self.write(m.id.as_str());
1499                }
1500                self.write(" import ");
1501                let mut first = true;
1502                for alias in &i.names {
1503                    if !first {
1504                        self.write(", ");
1505                    }
1506                    first = false;
1507                    self.alias(alias);
1508                }
1509            }
1510            Stmt::Global(g) => {
1511                self.fill("global ");
1512                self.write(&g.names.iter().map(|n| n.id.as_str().to_owned()).collect::<Vec<_>>().join(", "));
1513            }
1514            Stmt::Nonlocal(n) => {
1515                self.fill("nonlocal ");
1516                self.write(&n.names.iter().map(|x| x.id.as_str().to_owned()).collect::<Vec<_>>().join(", "));
1517            }
1518            Stmt::TypeAlias(t) => {
1519                self.fill("type ");
1520                self.expr(&t.name, prec::TEST);
1521                self.type_params(t.type_params.as_deref());
1522                self.write(" = ");
1523                self.expr(&t.value, prec::TEST);
1524            }
1525            Stmt::Expr(e) => {
1526                self.fill("");
1527                self.expr(&e.value, prec::YIELD);
1528            }
1529            Stmt::Match(m) => {
1530                self.fill("match ");
1531                self.expr(&m.subject, prec::TEST);
1532                self.write(":");
1533                for case in &m.cases {
1534                    self.fill("case ");
1535                    self.pattern(&case.pattern);
1536                    if let Some(guard) = &case.guard {
1537                        self.write(" if ");
1538                        self.expr(guard, prec::TEST);
1539                    }
1540                    self.write(":");
1541                    self.body(&case.body, false);
1542                }
1543            }
1544            Stmt::Pass(_) | Stmt::IpyEscapeCommand(_) => self.fill("pass"),
1545            Stmt::Break(_) => self.fill("break"),
1546            Stmt::Continue(_) => self.fill("continue"),
1547        }
1548    }
1549
1550    fn comma_exprs(&mut self, exprs: &[Expr]) {
1551        let mut first = true;
1552        for e in exprs {
1553            if !first {
1554                self.write(", ");
1555            }
1556            first = false;
1557            self.expr(e, prec::TEST);
1558        }
1559    }
1560
1561    #[allow(clippy::too_many_lines)]
1562    fn expr(&mut self, expr: &Expr, ctx: u8) {
1563        match expr {
1564            Expr::Name(n) => {
1565                let id = self.rename_id(n.id.as_str());
1566                self.write(&id);
1567            }
1568            Expr::NumberLiteral(n) => self.write(&repr_number(&n.value)),
1569            Expr::StringLiteral(s) => self.write(&repr_str(s.value.to_str())),
1570            Expr::BytesLiteral(b) => {
1571                let bytes: Vec<u8> = b.value.bytes().collect();
1572                self.write(&repr_bytes(&bytes));
1573            }
1574            Expr::BooleanLiteral(b) => self.write(if b.value { "True" } else { "False" }),
1575            // NoneLiteral and the (Jupyter-only, never-occurring) IpyEscapeCommand both render `None`.
1576            Expr::NoneLiteral(_) | Expr::IpyEscapeCommand(_) => self.write("None"),
1577            Expr::EllipsisLiteral(_) => self.write("..."),
1578            Expr::FString(f) => {
1579                // Iterate PARTS (not `.elements()`, which skips plain `StringLiteral` parts in
1580                // implicit concatenation like `f"a{x}" "b"`), then merge as CPython does.
1581                let mut body = String::new();
1582                for part in &f.value {
1583                    match part {
1584                        ast::FStringPart::Literal(s) => fstring_literal_into(&mut body, &s.value),
1585                        ast::FStringPart::FString(fs) => {
1586                            for element in &fs.elements {
1587                                let chunk = self.element_str(element);
1588                                body.push_str(&chunk);
1589                            }
1590                        }
1591                    }
1592                }
1593                self.write_fstring(&body, "f");
1594            }
1595            Expr::TString(t) => {
1596                let mut body = String::new();
1597                for tstr in &t.value {
1598                    for element in &tstr.elements {
1599                        let chunk = self.element_str(element);
1600                        body.push_str(&chunk);
1601                    }
1602                }
1603                self.write_fstring(&body, "t");
1604            }
1605            Expr::Attribute(a) => {
1606                self.expr(&a.value, prec::ATOM);
1607                self.write(".");
1608                self.write(a.attr.id.as_str());
1609            }
1610            Expr::Call(c) => {
1611                self.expr(&c.func, prec::ATOM);
1612                self.write("(");
1613                let mut first = true;
1614                for arg in &c.arguments.args {
1615                    if !first {
1616                        self.write(", ");
1617                    }
1618                    first = false;
1619                    self.expr(arg, prec::TEST);
1620                }
1621                for kw in &c.arguments.keywords {
1622                    if !first {
1623                        self.write(", ");
1624                    }
1625                    first = false;
1626                    self.keyword(kw);
1627                }
1628                self.write(")");
1629            }
1630            Expr::Subscript(s) => {
1631                self.expr(&s.value, prec::ATOM);
1632                self.write("[");
1633                match s.slice.as_ref() {
1634                    Expr::Tuple(t) if !t.elts.is_empty() => self.items_view(&t.elts),
1635                    other => self.expr(other, prec::TEST),
1636                }
1637                self.write("]");
1638            }
1639            Expr::Starred(s) => {
1640                self.write("*");
1641                self.expr(&s.value, prec::EXPR);
1642            }
1643            Expr::List(l) => {
1644                self.write("[");
1645                self.comma_exprs(&l.elts);
1646                self.write("]");
1647            }
1648            Expr::Tuple(t) => {
1649                let parens = t.elts.is_empty() || ctx > prec::TUPLE;
1650                if parens {
1651                    self.write("(");
1652                }
1653                self.items_view(&t.elts);
1654                if parens {
1655                    self.write(")");
1656                }
1657            }
1658            Expr::Set(s) => {
1659                if s.elts.is_empty() {
1660                    self.write("{*()}");
1661                } else {
1662                    self.write("{");
1663                    self.comma_exprs(&s.elts);
1664                    self.write("}");
1665                }
1666            }
1667            Expr::Dict(d) => {
1668                self.write("{");
1669                let mut first = true;
1670                for item in &d.items {
1671                    if !first {
1672                        self.write(", ");
1673                    }
1674                    first = false;
1675                    if let Some(k) = &item.key {
1676                        self.expr(k, prec::TEST);
1677                        self.write(": ");
1678                        self.expr(&item.value, prec::TEST);
1679                    } else {
1680                        self.write("**");
1681                        self.expr(&item.value, prec::EXPR);
1682                    }
1683                }
1684                self.write("}");
1685            }
1686            Expr::Slice(s) => {
1687                if let Some(l) = &s.lower {
1688                    self.expr(l, prec::TEST);
1689                }
1690                self.write(":");
1691                if let Some(u) = &s.upper {
1692                    self.expr(u, prec::TEST);
1693                }
1694                if let Some(step) = &s.step {
1695                    self.write(":");
1696                    self.expr(step, prec::TEST);
1697                }
1698            }
1699            Expr::BoolOp(b) => {
1700                let (op, p) = match b.op {
1701                    BoolOp::And => (" and ", prec::AND),
1702                    BoolOp::Or => (" or ", prec::OR),
1703                };
1704                let parens = ctx > p;
1705                if parens {
1706                    self.write("(");
1707                }
1708                let mut level = p;
1709                let mut first = true;
1710                for v in &b.values {
1711                    if !first {
1712                        self.write(op);
1713                    }
1714                    first = false;
1715                    level = next_prec(level);
1716                    self.expr(v, level);
1717                }
1718                if parens {
1719                    self.write(")");
1720                }
1721            }
1722            Expr::BinOp(b) => {
1723                let op = operator_tag(b.op);
1724                let p = binop_prec(b.op);
1725                let parens = ctx > p;
1726                if parens {
1727                    self.write("(");
1728                }
1729                let (lp, rp) = if matches!(b.op, Operator::Pow) { (next_prec(p), p) } else { (p, next_prec(p)) };
1730                self.expr(&b.left, lp);
1731                self.write(&format!(" {op} "));
1732                self.expr(&b.right, rp);
1733                if parens {
1734                    self.write(")");
1735                }
1736            }
1737            Expr::UnaryOp(u) => {
1738                let (op, p) = match u.op {
1739                    UnaryOp::Invert => ("~", prec::FACTOR),
1740                    UnaryOp::Not => ("not", prec::NOT),
1741                    UnaryOp::UAdd => ("+", prec::FACTOR),
1742                    UnaryOp::USub => ("-", prec::FACTOR),
1743                };
1744                let parens = ctx > p;
1745                if parens {
1746                    self.write("(");
1747                }
1748                self.write(op);
1749                if p != prec::FACTOR {
1750                    self.write(" ");
1751                }
1752                self.expr(&u.operand, p);
1753                if parens {
1754                    self.write(")");
1755                }
1756            }
1757            Expr::Compare(c) => {
1758                let parens = ctx > prec::CMP;
1759                if parens {
1760                    self.write("(");
1761                }
1762                self.expr(&c.left, next_prec(prec::CMP));
1763                for (op, comp) in c.ops.iter().zip(c.comparators.iter()) {
1764                    self.write(&format!(" {} ", cmpop_tag(*op)));
1765                    self.expr(comp, next_prec(prec::CMP));
1766                }
1767                if parens {
1768                    self.write(")");
1769                }
1770            }
1771            Expr::If(i) => {
1772                let parens = ctx > prec::TEST;
1773                if parens {
1774                    self.write("(");
1775                }
1776                self.expr(&i.body, next_prec(prec::TEST));
1777                self.write(" if ");
1778                self.expr(&i.test, next_prec(prec::TEST));
1779                self.write(" else ");
1780                self.expr(&i.orelse, prec::TEST);
1781                if parens {
1782                    self.write(")");
1783                }
1784            }
1785            Expr::Lambda(l) => {
1786                let parens = ctx > prec::TEST;
1787                if parens {
1788                    self.write("(");
1789                }
1790                self.write("lambda");
1791                let args = self.buffered(|s| s.arguments(l.parameters.as_deref()));
1792                if !args.is_empty() {
1793                    self.write(" ");
1794                    self.write(&args);
1795                }
1796                self.write(": ");
1797                self.expr(&l.body, prec::TEST);
1798                if parens {
1799                    self.write(")");
1800                }
1801            }
1802            Expr::Named(n) => {
1803                let parens = ctx > prec::NAMED_EXPR;
1804                if parens {
1805                    self.write("(");
1806                }
1807                self.expr(&n.target, prec::ATOM);
1808                self.write(" := ");
1809                self.expr(&n.value, prec::ATOM);
1810                if parens {
1811                    self.write(")");
1812                }
1813            }
1814            Expr::Await(a) => {
1815                let parens = ctx > prec::AWAIT;
1816                if parens {
1817                    self.write("(");
1818                }
1819                self.write("await ");
1820                self.expr(&a.value, prec::ATOM);
1821                if parens {
1822                    self.write(")");
1823                }
1824            }
1825            Expr::Yield(y) => {
1826                let parens = ctx > prec::YIELD;
1827                if parens {
1828                    self.write("(");
1829                }
1830                self.write("yield");
1831                if let Some(v) = &y.value {
1832                    self.write(" ");
1833                    self.expr(v, prec::ATOM);
1834                }
1835                if parens {
1836                    self.write(")");
1837                }
1838            }
1839            Expr::YieldFrom(y) => {
1840                let parens = ctx > prec::YIELD;
1841                if parens {
1842                    self.write("(");
1843                }
1844                self.write("yield from ");
1845                self.expr(&y.value, prec::ATOM);
1846                if parens {
1847                    self.write(")");
1848                }
1849            }
1850            Expr::ListComp(c) => {
1851                self.write("[");
1852                self.expr(&c.elt, prec::TEST);
1853                for g in &c.generators {
1854                    self.comprehension(g);
1855                }
1856                self.write("]");
1857            }
1858            Expr::SetComp(c) => {
1859                self.write("{");
1860                self.expr(&c.elt, prec::TEST);
1861                for g in &c.generators {
1862                    self.comprehension(g);
1863                }
1864                self.write("}");
1865            }
1866            Expr::Generator(c) => {
1867                self.write("(");
1868                self.expr(&c.elt, prec::TEST);
1869                for g in &c.generators {
1870                    self.comprehension(g);
1871                }
1872                self.write(")");
1873            }
1874            Expr::DictComp(c) => {
1875                self.write("{");
1876                self.expr(&c.key, prec::TEST);
1877                self.write(": ");
1878                self.expr(&c.value, prec::TEST);
1879                for g in &c.generators {
1880                    self.comprehension(g);
1881                }
1882                self.write("}");
1883            }
1884        }
1885    }
1886
1887    fn items_view(&mut self, elts: &[Expr]) {
1888        if elts.len() == 1 {
1889            self.expr(&elts[0], prec::TEST);
1890            self.write(",");
1891        } else {
1892            self.comma_exprs(elts);
1893        }
1894    }
1895
1896    fn comprehension(&mut self, comp: &Comprehension) {
1897        self.write(if comp.is_async { " async for " } else { " for " });
1898        self.expr(&comp.target, prec::TUPLE);
1899        self.write(" in ");
1900        self.expr(&comp.iter, next_prec(prec::TEST));
1901        for cond in &comp.ifs {
1902            self.write(" if ");
1903            self.expr(cond, next_prec(prec::TEST));
1904        }
1905    }
1906
1907    fn keyword(&mut self, kw: &Keyword) {
1908        match &kw.arg {
1909            Some(name) => {
1910                self.write(name.id.as_str());
1911                self.write("=");
1912            }
1913            None => self.write("**"),
1914        }
1915        self.expr(&kw.value, prec::TEST);
1916    }
1917
1918    fn alias(&mut self, alias: &ast::Alias) {
1919        self.write(alias.name.id.as_str());
1920        if let Some(asname) = &alias.asname {
1921            self.write(" as ");
1922            self.write(asname.id.as_str());
1923        }
1924    }
1925
1926    fn arg(&mut self, param: &Parameter) {
1927        let name = self.rename_id(param.name.id.as_str());
1928        self.write(&name);
1929        if let Some(annotation) = &param.annotation {
1930            self.write(": ");
1931            self.expr(annotation, prec::TEST);
1932        }
1933    }
1934
1935    fn arguments(&mut self, params: Option<&Parameters>) {
1936        let Some(p) = params else { return };
1937        let mut first = true;
1938        let posonly = p.posonlyargs.len();
1939        for (index, x) in p.posonlyargs.iter().chain(p.args.iter()).enumerate() {
1940            if first {
1941                first = false;
1942            } else {
1943                self.write(", ");
1944            }
1945            self.arg(&x.parameter);
1946            if let Some(default) = &x.default {
1947                self.write("=");
1948                self.expr(default, prec::TEST);
1949            }
1950            if index + 1 == posonly {
1951                self.write(", /");
1952            }
1953        }
1954        if p.vararg.is_some() || !p.kwonlyargs.is_empty() {
1955            if first {
1956                first = false;
1957            } else {
1958                self.write(", ");
1959            }
1960            self.write("*");
1961            if let Some(vararg) = &p.vararg {
1962                self.arg(vararg);
1963            }
1964        }
1965        for x in &p.kwonlyargs {
1966            self.write(", ");
1967            self.arg(&x.parameter);
1968            if let Some(default) = &x.default {
1969                self.write("=");
1970                self.expr(default, prec::TEST);
1971            }
1972        }
1973        if let Some(kwarg) = &p.kwarg {
1974            if first {
1975                first = false;
1976            } else {
1977                self.write(", ");
1978            }
1979            self.write("**");
1980            self.arg(kwarg);
1981        }
1982        let _ = first;
1983    }
1984
1985    fn type_params(&mut self, params: Option<&ast::TypeParams>) {
1986        let Some(tps) = params else { return };
1987        if tps.type_params.is_empty() {
1988            return;
1989        }
1990        self.write("[");
1991        let mut first = true;
1992        for tp in &tps.type_params {
1993            if !first {
1994                self.write(", ");
1995            }
1996            first = false;
1997            match tp {
1998                ast::TypeParam::TypeVar(t) => {
1999                    self.write(t.name.id.as_str());
2000                    if let Some(bound) = &t.bound {
2001                        self.write(": ");
2002                        self.expr(bound, prec::TEST);
2003                    }
2004                    if let Some(default) = &t.default {
2005                        self.write(" = ");
2006                        self.expr(default, prec::TEST);
2007                    }
2008                }
2009                ast::TypeParam::TypeVarTuple(t) => {
2010                    self.write("*");
2011                    self.write(t.name.id.as_str());
2012                }
2013                ast::TypeParam::ParamSpec(t) => {
2014                    self.write("**");
2015                    self.write(t.name.id.as_str());
2016                }
2017            }
2018        }
2019        self.write("]");
2020    }
2021
2022    /// Choose a quote (CPython `_ftstring_helper`: ALL_QUOTES order, skip any appearing in the body,
2023    /// prefer one whose char ≠ the body's last char; escape a forced final triple-quote char) and
2024    /// write `f'…'` / `t'…'`. Body already has literals escaped and interpolations rendered.
2025    fn write_fstring(&mut self, body: &str, prefix: &str) {
2026        let mut candidates: Vec<&str> =
2027            ["'", "\"", "\"\"\"", "'''"].into_iter().filter(|q| !body.contains(*q)).collect();
2028        self.write(prefix);
2029        let Some(&first) = candidates.first() else {
2030            // Body contains every quote form (pathological); best-effort triple-single.
2031            self.write("'''");
2032            self.write(body);
2033            self.write("'''");
2034            return;
2035        };
2036        // Stable: quotes whose first char ≠ body's last char sort first (avoids escaping a final quote).
2037        candidates.sort_by_key(|q| body.chars().last() == q.chars().next());
2038        let quote = *candidates.first().unwrap_or(&first);
2039        self.write(quote);
2040        if quote.len() == 3 && body.chars().last() == quote.chars().next() {
2041            let mut escaped = body.to_owned();
2042            let last = escaped.pop().unwrap_or(' ');
2043            escaped.push('\\');
2044            escaped.push(last);
2045            self.write(&escaped);
2046        } else {
2047            self.write(body);
2048        }
2049        self.write(quote);
2050    }
2051
2052    /// One f-string element → its rendered chunk. A `{x=}` debug interpolation prepends the source
2053    /// text (`leading + <expr source> + trailing`) as a literal, exactly as CPython bakes it.
2054    fn element_str(&mut self, element: &ast::InterpolatedStringElement) -> String {
2055        match element {
2056            ast::InterpolatedStringElement::Literal(lit) => fstring_literal_escaped(&lit.value),
2057            ast::InterpolatedStringElement::Interpolation(interp) => {
2058                let mut out = String::new();
2059                if let Some(dbg) = &interp.debug_text {
2060                    let range = interp.expression.range();
2061                    let expr_src =
2062                        self.src.get(usize::from(range.start())..usize::from(range.end())).unwrap_or("");
2063                    out.push_str(&fstring_literal_escaped(&format!("{}{expr_src}{}", dbg.leading, dbg.trailing)));
2064                }
2065                out.push_str(&self.interpolation(interp));
2066                out
2067            }
2068        }
2069    }
2070
2071    fn interpolation(&mut self, interp: &ast::InterpolatedElement) -> String {
2072        let mut out = String::from("{");
2073        let value = self.buffered_expr(&interp.expression, next_prec(prec::TEST));
2074        if value.starts_with('{') {
2075            out.push(' ');
2076        }
2077        out.push_str(&value);
2078        let mut conversion = match interp.conversion {
2079            ast::ConversionFlag::Str => Some('s'),
2080            ast::ConversionFlag::Ascii => Some('a'),
2081            ast::ConversionFlag::Repr => Some('r'),
2082            ast::ConversionFlag::None => None,
2083        };
2084        // A bare `{x=}` (debug, no explicit conversion / format spec) implies `!r`, like CPython.
2085        if conversion.is_none() && interp.debug_text.is_some() && interp.format_spec.is_none() {
2086            conversion = Some('r');
2087        }
2088        if let Some(c) = conversion {
2089            out.push('!');
2090            out.push(c);
2091        }
2092        if let Some(spec) = &interp.format_spec {
2093            out.push(':');
2094            for element in &spec.elements {
2095                match element {
2096                    ast::InterpolatedStringElement::Literal(lit) => {
2097                        let mut v = lit.value.replace('{', "{{").replace('}', "}}");
2098                        v = v.replace('\\', "\\\\").replace('\'', "\\'").replace('"', "\\\"").replace('\n', "\\n");
2099                        out.push_str(&v);
2100                    }
2101                    ast::InterpolatedStringElement::Interpolation(inner) => {
2102                        out.push_str(&self.interpolation(inner));
2103                    }
2104                }
2105            }
2106        }
2107        out.push('}');
2108        out
2109    }
2110
2111    fn pattern(&mut self, pat: &ast::Pattern) {
2112        match pat {
2113            ast::Pattern::MatchValue(p) => self.expr(&p.value, prec::TEST),
2114            ast::Pattern::MatchSingleton(p) => self.write(match p.value {
2115                ast::Singleton::None => "None",
2116                ast::Singleton::True => "True",
2117                ast::Singleton::False => "False",
2118            }),
2119            ast::Pattern::MatchSequence(p) => {
2120                self.write("[");
2121                let mut first = true;
2122                for x in &p.patterns {
2123                    if !first {
2124                        self.write(", ");
2125                    }
2126                    first = false;
2127                    self.pattern(x);
2128                }
2129                self.write("]");
2130            }
2131            ast::Pattern::MatchMapping(p) => {
2132                self.write("{");
2133                let mut first = true;
2134                for (k, v) in p.keys.iter().zip(p.patterns.iter()) {
2135                    if !first {
2136                        self.write(", ");
2137                    }
2138                    first = false;
2139                    self.expr(k, prec::TEST);
2140                    self.write(": ");
2141                    self.pattern(v);
2142                }
2143                if let Some(rest) = &p.rest {
2144                    if !p.keys.is_empty() {
2145                        self.write(", ");
2146                    }
2147                    self.write(&format!("**{}", rest.id.as_str()));
2148                }
2149                self.write("}");
2150            }
2151            ast::Pattern::MatchClass(p) => {
2152                self.expr(&p.cls, prec::ATOM);
2153                self.write("(");
2154                let mut first = true;
2155                for x in &p.arguments.patterns {
2156                    if !first {
2157                        self.write(", ");
2158                    }
2159                    first = false;
2160                    self.pattern(x);
2161                }
2162                for kw in &p.arguments.keywords {
2163                    if !first {
2164                        self.write(", ");
2165                    }
2166                    first = false;
2167                    self.write(&format!("{}=", kw.attr.id.as_str()));
2168                    self.pattern(&kw.pattern);
2169                }
2170                self.write(")");
2171            }
2172            ast::Pattern::MatchStar(p) => {
2173                let name = p.name.as_ref().map_or("_", |n| n.id.as_str());
2174                self.write(&format!("*{name}"));
2175            }
2176            ast::Pattern::MatchAs(p) => match (&p.pattern, &p.name) {
2177                (None, None) => self.write("_"),
2178                (None, Some(name)) => self.write(name.id.as_str()),
2179                (Some(inner), name) => {
2180                    self.pattern(inner);
2181                    if let Some(name) = name {
2182                        self.write(&format!(" as {}", name.id.as_str()));
2183                    }
2184                }
2185            },
2186            ast::Pattern::MatchOr(p) => {
2187                let mut first = true;
2188                for x in &p.patterns {
2189                    if !first {
2190                        self.write(" | ");
2191                    }
2192                    first = false;
2193                    self.pattern(x);
2194                }
2195            }
2196        }
2197    }
2198}
2199
2200/// Unparse the alpha-renamed function and split into stripped, non-empty lines (the Type-3 units).
2201fn unparse_lines(stmt: &Stmt, src: &str, locals: &HashSet<String>, map: HashMap<String, u32>) -> Vec<String> {
2202    let mut up = Unparse { out: String::new(), src, locals: Some(locals), map, blanked: false };
2203    up.stmt(stmt);
2204    up.out.lines().map(str::trim).filter(|l| !l.is_empty()).map(ToOwned::to_owned).collect()
2205}
2206
2207// ---------------------------------------------------------------------------
2208// Public entry points (unchanged signatures).
2209// ---------------------------------------------------------------------------
2210
2211/// CPython-`ast.dump`-shaped canonical of the leading def in `text` (names preserved, docstrings
2212/// stripped), or the raw text if it does not parse / has no statements. Single-text entry point.
2213#[must_use]
2214pub fn ast_canonical(text: &str) -> String {
2215    cluster_canonical(text)
2216}
2217
2218/// CPython-`ast.dump`-shaped canonical of the leading def in `text` (names preserved, docstrings
2219/// stripped), or the raw text if it does not parse / has no statements. Used by the clustering pass.
2220fn cluster_canonical(text: &str) -> String {
2221    let Ok(parsed) = parse_module(text) else {
2222        return text.to_string();
2223    };
2224    let module = parsed.into_syntax();
2225    let Some(stmt) = module.body.first() else {
2226        return text.to_string();
2227    };
2228    Dump::new(text, None).stmt(stmt)
2229}
2230
2231/// Batch canonicalize def texts (functions / classes / …) in parallel — replaces the Python
2232/// `ast_canonical` loop. Returns one canonical string per input, in order.
2233#[must_use]
2234pub fn ast_canonical_many(texts: &[String]) -> Vec<String> {
2235    texts.par_iter().map(|text| cluster_canonical(text)).collect()
2236}
2237
2238/// Name-agnostic forms of one function: `(alpha-renamed canonical, per-statement renamed lines,
2239/// node count)`, or `None` if `text` is not a single function definition.
2240fn normalize_one(text: &str) -> Option<(String, Vec<String>, usize)> {
2241    let parsed = parse_module(text).ok()?;
2242    let module = parsed.into_syntax();
2243    let stmt = module.body.first()?;
2244    let Stmt::FunctionDef(func) = stmt else {
2245        return None; // ruff folds async into StmtFunctionDef; classes/others are out of scope here
2246    };
2247
2248    let mut collect = Collect::default();
2249    collect.add_params(&func.parameters); // top fn's params (nested defs' params are not locals)
2250    collect.visit_stmt(stmt);
2251    let locals = collect.bound;
2252
2253    let mut dump = Dump::new(text, Some(&locals));
2254    let canonical = dump.stmt(stmt);
2255    let size = dump.count;
2256
2257    // Type-3 shingle units: `ast.unparse(normalized_fn).splitlines()` stripped — reproduced exactly
2258    // (CPython 3.14 unparse) so the ECScan IDF/cosine matches the CPython `ast` reference bit-for-bit. The
2259    // rename map from the dump pass is reused so the `_v{n}` numbering is identical to the canonical.
2260    let lines = unparse_lines(stmt, text, &locals, dump.map);
2261    Some((canonical, lines, size))
2262}
2263
2264/// Batch alpha-rename canonicalize function texts in parallel — replaces the Python `_analyze`
2265/// (cross-name + Type-3 canonicalization). `None` entries are non-function texts.
2266#[must_use]
2267pub fn normalize_functions(texts: &[String]) -> Vec<Option<(String, Vec<String>, usize)>> {
2268    texts.par_iter().map(|text| normalize_one(text)).collect()
2269}
2270
2271/// Full dup-defs analysis of one function FROM AN AST NODE (no re-parse): `(cluster_canonical,
2272/// xname_canonical, lines, size)`, or `None` if `stmt` is not a function def. `src` is the source the
2273/// node's ranges index into (for f-string `{x=}` debug text). The node's own (top) decorators are
2274/// excluded — matching the decorator-stripped def *text* the text-based path canonicalized — so a
2275/// canonical built from a file's AST node is byte-identical to one built by re-parsing the def text.
2276pub(crate) fn analyze_stmt(stmt: &Stmt, src: &str) -> Option<AnalyzedFn> {
2277    let Stmt::FunctionDef(func) = stmt else {
2278        return None;
2279    };
2280    let cluster_canonical = Dump::new(src, None).stmt(stmt);
2281
2282    let mut collect = Collect::default();
2283    collect.add_params(&func.parameters);
2284    collect.visit_stmt(stmt);
2285    let locals = collect.bound;
2286
2287    let mut dump = Dump::new(src, Some(&locals));
2288    let xname_canonical = dump.stmt(stmt);
2289    let size = dump.count;
2290    let lines = unparse_lines(stmt, src, &locals, dump.map);
2291    Some((cluster_canonical, xname_canonical, lines, size))
2292}
2293
2294/// Names-preserved cluster canonical of any def node (functions AND classes), decorators of the top
2295/// node excluded. Node-based counterpart of `cluster_canonical`, used by the scan to canonicalize
2296/// classes without a re-parse.
2297pub(crate) fn cluster_canonical_node(stmt: &Stmt, src: &str) -> String {
2298    Dump::new(src, None).stmt(stmt)
2299}
2300
2301fn analyze_one(text: &str) -> Option<AnalyzedFn> {
2302    let module = parse_module(text).ok()?.into_syntax();
2303    let stmt = module.body.first()?;
2304    analyze_stmt(stmt, text)
2305}
2306
2307/// Batch `analyze_one` in parallel — one parse per function, all dup-defs canonical forms at once.
2308#[must_use]
2309pub fn analyze_functions(texts: &[String]) -> Vec<Option<AnalyzedFn>> {
2310    texts.par_iter().map(|text| analyze_one(text)).collect()
2311}