opendp_tooling/codegen/
python.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::PathBuf;
4
5use crate::{Argument, Function, TypeRecipe, Value};
6
7use crate::codegen::tab_py;
8
9use super::flatten_type_recipe;
10
11/// Top-level function to generate Python bindings, including all modules.
12pub fn generate_bindings(modules: &HashMap<String, Vec<Function>>) -> HashMap<PathBuf, String> {
13    let typemap: HashMap<String, String> =
14        serde_json::from_str(&include_str!("python_typemap.json")).unwrap();
15    let hierarchy: HashMap<String, Vec<String>> =
16        serde_json::from_str(&include_str!("type_hierarchy.json")).unwrap();
17
18    modules
19        .into_iter()
20        .map(|(module_name, module)| {
21            (
22                PathBuf::from(format!(
23                    "{}.py",
24                    if ["data", "internal"].contains(&module_name.as_str()) {
25                        format!("_{module_name}")
26                    } else {
27                        module_name.clone()
28                    }
29                )),
30                generate_module(module_name, module, &typemap, &hierarchy),
31            )
32        })
33        .collect()
34}
35
36/// Generates all code for an opendp python module.
37/// Each call corresponds to one python file.
38fn generate_module(
39    module_name: &str,
40    module: &Vec<Function>,
41    typemap: &HashMap<String, String>,
42    hierarchy: &HashMap<String, Vec<String>>,
43) -> String {
44    let all = module
45        .iter()
46        .filter(|func| func.has_ffi)
47        .map(|func| format!("    \"{}\"", func.name))
48        .chain(
49            module
50                .iter()
51                .filter(|func| func.supports_partial && func.has_ffi)
52                .map(|func| format!("    \"{}\"", func.name.replacen("make_", "then_", 1))),
53        )
54        .collect::<Vec<_>>()
55        .join(",\n");
56    let functions = module
57        .into_iter()
58        .filter(|func| func.has_ffi)
59        .map(|func| generate_function(&module_name, &func, typemap, hierarchy))
60        .collect::<Vec<String>>()
61        .join("\n");
62
63    // the comb module needs access to core functions for type introspection on measurements/transformations
64    let constructor_mods = ["combinators", "measurements", "transformations", "internal"];
65
66    let extra_imports = if constructor_mods.contains(&module_name) {
67        r#"from opendp.core import *
68from opendp.domains import *
69from opendp.metrics import *
70from opendp.measures import *"#
71    } else {
72        ""
73    };
74
75    fn boilerplate(name: String) -> String {
76        format!(
77            "
78For more context, see :ref:`{name} in the User Guide <{name}-user-guide>`.
79
80For convenience, all the functions of this module are also available from :py:mod:`opendp.prelude`.
81We suggest importing under the conventional name ``dp``:
82
83.. code:: python
84
85    >>> import opendp.prelude as dp"
86        )
87    }
88
89    fn special_boilerplate(name: String) -> String {
90        let initial = name.chars().nth(0);
91        format!(
92            "{}\n\nThe methods of this module will then be accessible at ``dp.{}``.",
93            boilerplate(name),
94            match initial {
95                Some(s) => s.to_string(),
96                None => "".to_string(),
97            }
98        )
99    }
100
101    let module_docs = match module_name {
102        "accuracy" => format!(
103            "{}{}",
104            "The ``accuracy`` module provides functions for converting between accuracy and scale parameters.",
105            boilerplate("accuracy".to_string())),
106        "combinators" => format!(
107            "{}{}",
108            "The ``combinators`` module provides functions for combining transformations and measurements.",
109            special_boilerplate("combinators".to_string())),
110        "core" => format!(
111            "{}{}",
112            "The ``core`` module provides functions for accessing the fields of transformations and measurements.".to_string(),
113            boilerplate("core".to_string())),
114        "domains" => format!(
115            "{}{}",
116            "The ``domains`` module provides functions for creating and using domains.",
117            boilerplate("domains".to_string())),
118        "measurements" => format!(
119            "{}{}",
120            "The ``measurements`` module provides functions that apply calibrated noise to data to ensure differential privacy.",
121            special_boilerplate("measurements".to_string())),
122        "measures" => format!(
123            "{}{}",
124            "The ``measures`` module provides functions that measure the distance between probability distributions.",
125            boilerplate("measures".to_string())),
126        "metrics" => format!(
127            "{}{}",
128            "The ``metrics`` module provides fuctions that measure the distance between two elements of a domain.",
129            boilerplate("metrics".to_string())),
130        "transformations" => format!(
131            "{}{}",
132            "The ``transformations`` module provides functions that deterministicly transform datasets.",
133            special_boilerplate("transformations".to_string())),
134        "internal" => "The ``internal`` module provides functions that can be used to construct library primitives without the use of the \"honest-but-curious\" flag.".to_string(),
135        _ => "TODO!".to_string()
136    };
137
138    format!(
139        r#"# Auto-generated. Do not edit!
140'''
141{module_docs}
142'''
143from deprecated.sphinx import deprecated # noqa: F401 (Not every file actually has deprecated functions.)
144
145from opendp._convert import *
146from opendp._lib import *
147from opendp.mod import *
148from opendp.typing import *
149{extra_imports}
150__all__ = [
151{all}
152]
153
154{functions}"#
155    )
156}
157
158pub(crate) fn generate_function(
159    module_name: &str,
160    func: &Function,
161    typemap: &HashMap<String, String>,
162    hierarchy: &HashMap<String, Vec<String>>,
163) -> String {
164    println!("generating: {}", func.name);
165    let mut args = func
166        .args
167        .iter()
168        .map(|arg| generate_input_argument(arg, func, hierarchy))
169        .collect::<Vec<_>>();
170    args.sort_by(|(_, l_is_default), (_, r_is_default)| l_is_default.cmp(r_is_default));
171    let args = args.into_iter().map(|v| v.0).collect::<Vec<_>>();
172
173    let sig_return = func
174        .ret
175        .python_type_hint(hierarchy)
176        .map(|v| format!(" -> {}", v))
177        .unwrap_or_else(String::new);
178
179    let docstring = tab_py(generate_docstring(module_name, func, hierarchy));
180    let body = tab_py(generate_body(module_name, func, typemap));
181
182    let example_path = format!("src/{}/code/{}.rst", &module_name, func.name);
183    let example = match fs::read_to_string(example_path) {
184        Ok(string) => tab_py(format!("\n\n:example:\n\n{string}\n")),
185        Err(_) => "".to_string(),
186    };
187
188    let then_name = func.name.replacen("make_", "then_", 1);
189    let then_func = if func.supports_partial {
190        format!(
191            r#"
192
193def {then_name}(
194{then_args}
195):  
196    r"""partial constructor of {func_name}
197
198    .. seealso:: 
199      Delays application of `input_domain` and `input_metric` in :py:func:`opendp.{module_name}.{func_name}`
200
201{doc_params}{example}
202    """
203    output = _PartialConstructor(lambda {dom_met}: {name}(
204{args}))
205    output.__opendp_dict__ = {{
206            '__function__': '{then_name}',
207            '__module__': '{module_name}',
208            '__kwargs__': {{
209                {func_args}
210            }},
211        }}
212    return output
213"#,
214            func_name = func.name,
215            doc_params = tab_py(
216                func.args
217                    .iter()
218                    .skip(2)
219                    .map(|v| generate_docstring_arg(v, hierarchy))
220                    .collect::<Vec<String>>()
221                    .join("\n")
222            ),
223            then_args = tab_py(args[2..].join(",\n")),
224            dom_met = func.args[..2]
225                .iter()
226                .map(|arg| arg.name())
227                .collect::<Vec<_>>()
228                .join(", "),
229            name = func.name,
230            args = tab_py(tab_py(
231                func.args
232                    .iter()
233                    .map(|arg| format!("{name}={name}", name = arg.name()))
234                    .collect::<Vec<_>>()
235                    .join(",\n")
236            )),
237            func_args = func
238                .args
239                .iter()
240                .skip(2)
241                .map(|v| format!(r"'{name}': {name}", name = v.name()))
242                .collect::<Vec<String>>()
243                .join(", "),
244        )
245    } else {
246        String::new()
247    };
248
249    let deprecated_decorator = func
250        .deprecation
251        .as_ref()
252        .map(|deprecation| {
253            format!(
254                "@deprecated(version=\"{}\", reason=\"{}\")\n",
255                deprecation.since, deprecation.note
256            )
257        })
258        .unwrap_or_default();
259
260    format!(
261        r#"
262{deprecated_decorator}def {func_name}(
263{args}
264){sig_return}:
265{docstring}
266{body}{then_func}
267"#,
268        func_name = func.name,
269        args = tab_py(args.join(",\n"))
270    )
271}
272
273/// generate an input argument, complete with name, hint and default.
274/// also returns a bool to make it possible to move arguments with defaults to the end of the signature.
275fn generate_input_argument(
276    arg: &Argument,
277    func: &Function,
278    hierarchy: &HashMap<String, Vec<String>>,
279) -> (String, bool) {
280    let default = if let Some(default) = &arg.default {
281        Some(match default {
282            Value::Null => "None".to_string(),
283            Value::Bool(value) => if *value { "True" } else { "False" }.to_string(),
284            Value::Integer(int) => int.to_string(),
285            Value::Float(float) => float.to_string(),
286            Value::String(string) => format!("\"{}\"", string),
287        })
288    } else {
289        // let default value be None if it is a type arg and there is a public example
290        generate_public_example(func, arg).map(|_| "None".to_string())
291    };
292    (
293        format!(
294            r#"{name}{hint}{default}"#,
295            name = arg.name(),
296            hint = arg
297                .python_type_hint(hierarchy)
298                // Add `Optional` annotation only if there is a default and it is `None`.
299                .map(
300                    |hint| if default.as_ref().is_some_and(|v| v.as_str() == "None") {
301                        format!("Optional[{}]", hint)
302                    } else {
303                        hint
304                    }
305                )
306                // don't hint for args that are not converted
307                .filter(|_| !arg.do_not_convert)
308                .map(|hint| format!(": {}", hint))
309                .unwrap_or_else(String::new),
310            default = default
311                .as_ref()
312                .map(|default| format!(" = {}", default))
313                .unwrap_or_else(String::new)
314        ),
315        default.is_some(),
316    )
317}
318
319/// generate a docstring for the current function, with the function description, args, and return
320/// in Sphinx format: https://sphinx-rtd-tutorial.readthedocs.io/en/latest/docstrings.html
321fn generate_docstring(
322    module_name: &str,
323    func: &Function,
324    hierarchy: &HashMap<String, Vec<String>>,
325) -> String {
326    let description = (func.description.as_ref())
327        .map(|v| format!("{}\n", v))
328        .unwrap_or_else(String::new);
329
330    let doc_args = func
331        .args
332        .iter()
333        .map(|v| generate_docstring_arg(v, hierarchy))
334        .collect::<Vec<String>>()
335        .join("\n");
336
337    let raises = format!(
338        r#":raises TypeError: if an argument's type differs from the expected type
339:raises UnknownTypeException: if a type argument fails to parse{opendp_raise}"#,
340        opendp_raise = if func.ret.c_type_origin() == "FfiResult" {
341            "\n:raises OpenDPException: packaged error from the core OpenDP library"
342        } else {
343            ""
344        }
345    );
346
347    let example_path = format!("src/{}/code/{}.rst", &module_name, &func.name);
348    let example = match fs::read_to_string(example_path) {
349        Ok(string) => format!("\n\n:example:\n\n{string}\n"),
350        Err(_) => "".to_string(),
351    };
352
353    format!(
354        r#"r"""{description}
355{doc_args}{ret_arg}
356{raises}{example}
357""""#,
358        description = description,
359        doc_args = doc_args,
360        ret_arg = generate_docstring_return_arg(&func.ret, hierarchy),
361        raises = raises
362    )
363}
364
365/// generate the part of a docstring corresponding to an argument
366fn generate_docstring_arg(arg: &Argument, hierarchy: &HashMap<String, Vec<String>>) -> String {
367    let name = arg.name.clone().unwrap_or_default();
368    format!(
369        r#":param {name}: {description}{type_}"#,
370        name = name,
371        type_ = arg
372            .python_type_hint(hierarchy)
373            .map(|v| if v.as_str() == "RuntimeTypeDescriptor" {
374                ":py:ref:`RuntimeTypeDescriptor`".to_string()
375            } else {
376                v
377            })
378            .map(|v| format!("\n:type {}: {}", name, v))
379            .unwrap_or_default(),
380        description = arg.description.clone().unwrap_or_default()
381    )
382}
383
384/// generate the part of a docstring corresponding to a return argument
385fn generate_docstring_return_arg(
386    arg: &Argument,
387    hierarchy: &HashMap<String, Vec<String>>,
388) -> String {
389    let mut ret = Vec::new();
390    if let Some(description) = &arg.description {
391        ret.push(format!(":return: {description}", description = description));
392    }
393    if let Some(type_) = arg.python_type_hint(hierarchy) {
394        ret.push(format!(":rtype: {type_}", type_ = type_));
395    }
396    if !ret.is_empty() {
397        ret.insert(0, String::new());
398    }
399    ret.join("\n")
400}
401
402/// generate the function body, consisting of type args formatters, data converters, and the call
403/// - type arg formatters make every type arg a RuntimeType, and construct derived RuntimeTypes
404/// - data converters convert from python to c representations according to the formatted type args
405/// - the call constructs and retrieves the ffi function name, sets ctypes,
406///     makes the call, handles errors, and converts the response to python
407fn generate_body(module_name: &str, func: &Function, typemap: &HashMap<String, String>) -> String {
408    format!(
409        r#"{flag_checker}{type_arg_formatter}
410{data_converter}
411{make_call}
412{serialization}
413return output"#,
414        serialization = generate_serialization(module_name, func),
415        flag_checker = generate_flag_check(&func.features),
416        type_arg_formatter = generate_type_arg_formatter(func),
417        data_converter = generate_data_converter(func, typemap),
418        make_call = generate_call(module_name, func, typemap)
419    )
420}
421
422// generate code that checks that a set of feature flags are enabled
423fn generate_flag_check(features: &Vec<String>) -> String {
424    if features.is_empty() {
425        String::default()
426    } else {
427        format!(
428            "assert_features({})\n\n",
429            features
430                .iter()
431                .map(|f| format!("\"{}\"", f))
432                .collect::<Vec<_>>()
433                .join(", ")
434        )
435    }
436}
437
438/// generate code that provides an example of the type of the type_arg
439fn generate_public_example(func: &Function, type_arg: &Argument) -> Option<String> {
440    // the json has supplied explicit instructions to find an example
441    if let Some(example) = &type_arg.example {
442        return Some(example.to_python());
443    }
444
445    let type_name = type_arg.name.as_ref().unwrap();
446
447    // rewrite args to remove references to derived types
448    let mut args = func.args.clone();
449    args.iter_mut()
450        .filter(|arg| arg.rust_type.is_some())
451        .for_each(|arg| {
452            arg.rust_type = Some(flatten_type_recipe(
453                arg.rust_type.as_ref().unwrap(),
454                &func.derived_types,
455            ))
456        });
457
458    // code generation
459    args.iter()
460        .filter_map(|arg| match &arg.rust_type {
461            Some(TypeRecipe::Name(name)) => (name == type_name).then(|| arg.name()),
462            Some(TypeRecipe::Nest { origin, args }) => {
463                if origin == "Vec" {
464                    if let TypeRecipe::Name(arg_name) = &args[0] {
465                        if arg_name == type_name {
466                            Some(format!("get_first({name})", name = arg.name()))
467                        } else {
468                            None
469                        }
470                    } else {
471                        None
472                    }
473                } else {
474                    None
475                }
476            }
477            _ => None,
478        })
479        .next()
480}
481
482/// the generated code ensures every type arg is a RuntimeType, and constructs derived RuntimeTypes
483fn generate_type_arg_formatter(func: &Function) -> String {
484    let type_arg_formatter: String = func.args.iter()
485        .filter(|arg| arg.is_type)
486        .map(|type_arg| {
487            let name = type_arg.name.as_ref().expect("type args must be named");
488            let generics = if type_arg.generics.is_empty() {
489                "".to_string()
490            } else {
491                format!(", generics=[{}]", type_arg.generics.iter()
492                    .map(|v| format!("\"{}\"", v))
493                    .collect::<Vec<_>>().join(", "))
494            };
495            if let Some(example) = generate_public_example(func, type_arg) {
496                format!(r#"{name} = RuntimeType.parse_or_infer(type_name={name}, public_example={example}{generics})"#)
497            } else {
498                format!(r#"{name} = RuntimeType.parse(type_name={name}{generics})"#)
499            }
500        })
501        // additional types that are constructed by introspecting existing types
502        .chain(func.derived_types.iter()
503            .map(|type_spec|
504                format!("{name} = {derivation} # type: ignore",
505                        name = type_spec.name(),
506                        derivation = type_spec.rust_type.as_ref().unwrap().to_python())))
507        .chain(func.args.iter()
508            .filter(|arg| !arg.generics.is_empty())
509            .map(|arg|
510                format!("{name} = {name}.substitute({args}) # type: ignore",
511                        name=arg.name.as_ref().unwrap(),
512                        args=arg.generics.iter()
513                            .map(|generic| format!("{generic}={generic}", generic = generic))
514                            .collect::<Vec<_>>().join(", "))))
515        .collect::<Vec<_>>()
516        .join("\n");
517
518    if type_arg_formatter.is_empty() {
519        "# No type arguments to standardize.".to_string()
520    } else {
521        format!(
522            r#"# Standardize type arguments.
523{formatter}
524"#,
525            formatter = type_arg_formatter
526        )
527    }
528}
529
530/// the generated code ensures that all arguments have been converted to their c representations
531fn generate_data_converter(func: &Function, typemap: &HashMap<String, String>) -> String {
532    let data_converter: String = func
533        .args
534        .iter()
535        .map(|arg| {
536            let name = arg.name();
537            if arg.do_not_convert {
538                return format!("c_{name} = {name}");
539            };
540            format!(
541                r#"c_{name} = py_to_c({name}, c_type={c_type}{rust_type_arg})"#,
542                c_type = arg.python_origin_ctype(typemap),
543                rust_type_arg = arg
544                    .rust_type
545                    .as_ref()
546                    .map(|r_type| format!(", type_name={}", r_type.to_python()))
547                    .unwrap_or_else(|| "".to_string())
548            )
549        })
550        .collect::<Vec<_>>()
551        .join("\n");
552
553    if data_converter.is_empty() {
554        "# No arguments to convert to c types.".to_string()
555    } else {
556        format!(
557            r#"# Convert arguments to c types.
558{converter}
559"#,
560            converter = data_converter
561        )
562    }
563}
564
565/// the generated code
566/// - constructs and retrieves the ffi function name
567/// - sets argtypes and restype on the ctypes function
568/// - makes the call assuming that the arguments have been converted to C
569/// - handles errors
570/// - converts the response to python
571fn generate_call(module_name: &str, func: &Function, typemap: &HashMap<String, String>) -> String {
572    let mut call = format!(
573        r#"lib_function({args})"#,
574        args = func
575            .args
576            .iter()
577            .map(|arg| format!("c_{}", arg.name()))
578            .collect::<Vec<_>>()
579            .join(", ")
580    );
581    let ctype_restype = func.ret.python_origin_ctype(typemap);
582    if ctype_restype == "FfiResult" {
583        call = format!(
584            r#"unwrap({call}, {restype})"#,
585            call = call,
586            restype = func.ret.python_unwrapped_ctype(typemap)
587        )
588    }
589    if !func.ret.do_not_convert {
590        call = format!(r#"c_to_py({})"#, call)
591    }
592    format!(
593        r#"# Call library function.
594lib_function = lib.opendp_{module_name}__{func_name}
595lib_function.argtypes = [{ctype_args}]
596lib_function.restype = {ctype_restype}
597
598output = {call}"#,
599        module_name = module_name,
600        func_name = func.name,
601        ctype_args = func
602            .args
603            .iter()
604            .map(|v| v.python_origin_ctype(typemap))
605            .collect::<Vec<_>>()
606            .join(", "),
607        ctype_restype = ctype_restype,
608        call = call
609    )
610}
611
612fn generate_serialization(module_name: &str, func: &Function) -> String {
613    format!(
614        r#"try:
615    output.__opendp_dict__ = {{
616        '__function__': '{func_name}',
617        '__module__': '{module_name}',
618        '__kwargs__': {{
619            {func_args}
620        }},
621    }}
622except AttributeError:  # pragma: no cover
623    pass"#,
624        func_name = func.name,
625        func_args = func
626            .args
627            .iter()
628            .map(|v| format!(r"'{name}': {name}", name = v.name()))
629            .collect::<Vec<String>>()
630            .join(", "),
631    )
632}
633
634impl TypeRecipe {
635    /// translate the abstract derived_types info into python RuntimeType constructors
636    pub fn to_python(&self) -> String {
637        match self {
638            Self::Name(name) => name.clone(),
639            Self::Function { function, params } => format!(
640                "{function}({params})",
641                function = function,
642                params = params
643                    .iter()
644                    .map(|v| v.to_python())
645                    .collect::<Vec<_>>()
646                    .join(", ")
647            ),
648            Self::Nest { origin, args } => format!(
649                "RuntimeType(origin='{origin}', args=[{args}])",
650                origin = origin,
651                args = args
652                    .iter()
653                    .map(|arg| arg.to_python())
654                    .collect::<Vec<_>>()
655                    .join(", ")
656            ),
657            Self::None => "None".to_string(),
658        }
659    }
660}